"Fossies" - the Fresh Open Source Software Archive

Member "gawk-5.1.0/builtin.c" (10 Mar 2020, 102212 Bytes) of package /linux/misc/gawk-5.1.0.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "builtin.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 5.0.1_vs_5.1.0.

    1 /*
    2  * builtin.c - Builtin functions and various utility procedures.
    3  */
    4 
    5 /*
    6  * Copyright (C) 1986, 1988, 1989, 1991-2020,
    7  * the Free Software Foundation, Inc.
    8  *
    9  * This file is part of GAWK, the GNU implementation of the
   10  * AWK Programming Language.
   11  *
   12  * GAWK is free software; you can redistribute it and/or modify
   13  * it under the terms of the GNU General Public License as published by
   14  * the Free Software Foundation; either version 3 of the License, or
   15  * (at your option) any later version.
   16  *
   17  * GAWK is distributed in the hope that it will be useful,
   18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
   19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   20  * GNU General Public License for more details.
   21  *
   22  * You should have received a copy of the GNU General Public License
   23  * along with this program; if not, write to the Free Software
   24  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
   25  */
   26 
   27 
   28 #include "awk.h"
   29 #if defined(HAVE_FCNTL_H)
   30 #include <fcntl.h>
   31 #endif
   32 #include <math.h>
   33 #include "random.h"
   34 #include "floatmagic.h"
   35 
   36 #if defined(HAVE_POPEN_H)
   37 #include "popen.h"
   38 #endif
   39 
   40 #ifndef CHAR_BIT
   41 # define CHAR_BIT 8
   42 #endif
   43 
   44 /* The extra casts work around common compiler bugs.  */
   45 #define TYPE_SIGNED(t) (! ((t) 0 < (t) -1))
   46 /* Note:  these assume that negative integers are represented internally
   47    via 2's complement, which is not mandated by C.  They also ignore the
   48    fact that signed integer arithmetic overflow can trigger exceptions,
   49    unlike unsigned which is guaranteed not to do so. */
   50 #define TYPE_MINIMUM(t) ((t) (TYPE_SIGNED (t) \
   51                   ? ~ (uintmax_t) 0 << (sizeof (t) * CHAR_BIT - 1) \
   52                   : 0))
   53 #define TYPE_MAXIMUM(t) ((t) (~ (t) 0 - TYPE_MINIMUM (t)))
   54 
   55 #ifndef INTMAX_MIN
   56 # define INTMAX_MIN TYPE_MINIMUM (intmax_t)
   57 #endif
   58 #ifndef UINTMAX_MAX
   59 # define UINTMAX_MAX TYPE_MAXIMUM (uintmax_t)
   60 #endif
   61 
   62 #ifndef SIZE_MAX    /* C99 constant, can't rely on it everywhere */
   63 #define SIZE_MAX ((size_t) -1)
   64 #endif
   65 
   66 #define DEFAULT_G_PRECISION 6
   67 
   68 static size_t mbc_byte_count(const char *ptr, size_t numchars);
   69 static size_t mbc_char_count(const char *ptr, size_t numbytes);
   70 
   71 /* Can declare these, since we always use the random shipped with gawk */
   72 extern char *initstate(unsigned long seed, char *state, long n);
   73 extern char *setstate(char *state);
   74 extern long random(void);
   75 extern void srandom(unsigned long seed);
   76 
   77 extern NODE **args_array;
   78 extern int max_args;
   79 extern NODE **fields_arr;
   80 extern bool output_is_tty;
   81 extern FILE *output_fp;
   82 
   83 
   84 #define POP_TWO_SCALARS(s1, s2) \
   85 s2 = POP_SCALAR(); \
   86 s1 = POP(); \
   87 do { if (s1->type == Node_var_array) { \
   88 DEREF(s2); \
   89 fatal(_("attempt to use array `%s' in a scalar context"), array_vname(s1)); \
   90 }} while (false)
   91 
   92 
   93 /*
   94  * Since we supply the version of random(), we know what
   95  * value to use here.
   96  */
   97 #define GAWK_RANDOM_MAX 0x7fffffffL
   98 
   99 /* efwrite --- like fwrite, but with error checking */
  100 
  101 static void
  102 efwrite(const void *ptr,
  103     size_t size,
  104     size_t count,
  105     FILE *fp,
  106     const char *from,
  107     struct redirect *rp,
  108     bool flush)
  109 {
  110     errno = 0;
  111     if (rp != NULL) {
  112         if (rp->output.gawk_fwrite(ptr, size, count, fp, rp->output.opaque) != count)
  113             goto wrerror;
  114     } else if (fwrite(ptr, size, count, fp) != count)
  115         goto wrerror;
  116     if (flush
  117       && ((fp == stdout && output_is_tty)
  118           || (rp != NULL && (rp->flag & RED_NOBUF) != 0))) {
  119         if (rp != NULL) {
  120             rp->output.gawk_fflush(fp, rp->output.opaque);
  121             if (rp->output.gawk_ferror(fp, rp->output.opaque))
  122                 goto wrerror;
  123         } else {
  124             fflush(fp);
  125             if (ferror(fp))
  126                 goto wrerror;
  127         }
  128     }
  129     return;
  130 
  131 wrerror:
  132 #ifdef __MINGW32__
  133     if (errno == 0 || errno == EINVAL)
  134         w32_maybe_set_errno();
  135 #endif
  136     /* for stdout, die with a real SIGPIPE, like other awks */
  137     if (fp == stdout && errno == EPIPE)
  138         die_via_sigpipe();
  139 
  140     /* otherwise die verbosely */
  141     if ((rp != NULL) ? is_non_fatal_redirect(rp->value, strlen(rp->value)) : is_non_fatal_std(fp))
  142         update_ERRNO_int(errno);
  143     else
  144         fatal(_("%s to \"%s\" failed: %s"), from,
  145             rp != NULL
  146                 ? rp->value
  147                 : fp == stdout
  148                     ? _("standard output")
  149                     : _("standard error"),
  150             errno ? strerror(errno) : _("reason unknown"));
  151 }
  152 
  153 /* do_exp --- exponential function */
  154 
  155 NODE *
  156 do_exp(int nargs)
  157 {
  158     NODE *tmp;
  159     double d, res;
  160 
  161     tmp = POP_SCALAR();
  162     if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0)
  163         lintwarn(_("%s: received non-numeric argument"), "exp");
  164     d = force_number(tmp)->numbr;
  165     DEREF(tmp);
  166     errno = 0;
  167     res = exp(d);
  168     if (errno == ERANGE)
  169         warning(_("exp: argument %g is out of range"), d);
  170     return make_number((AWKNUM) res);
  171 }
  172 
  173 /* stdfile --- return fp for a standard file */
  174 
  175 /*
  176  * This function allows `fflush("/dev/stdout")' to work.
  177  * The other files will be available via getredirect().
  178  * /dev/stdin is not included, since fflush is only for output.
  179  */
  180 
  181 static FILE *
  182 stdfile(const char *name, size_t len)
  183 {
  184     if (len == 11) {
  185         if (strncmp(name, "/dev/stderr", 11) == 0)
  186             return stderr;
  187         else if (strncmp(name, "/dev/stdout", 11) == 0)
  188             return stdout;
  189     }
  190 
  191     return NULL;
  192 }
  193 
  194 /* do_fflush --- flush output, either named file or pipe or everything */
  195 
  196 NODE *
  197 do_fflush(int nargs)
  198 {
  199     struct redirect *rp;
  200     NODE *tmp;
  201     FILE *fp;
  202     int status = 0;
  203     const char *file;
  204     int len;
  205 
  206     /*
  207      * November, 2012.
  208      * It turns out that circa 2002, when BWK
  209      * added fflush() and fflush("") to his awk, he made both of
  210      * them flush everything.
  211      *
  212      * Now, with our inside agent getting ready to try to get fflush()
  213      * standardized in POSIX, we are going to make our awk consistent
  214      * with his.  This should not really affect anyone, as flushing
  215      * everything also flushes stdout.
  216      *
  217      * So. Once upon a time:
  218      *  fflush()    --- flush stdout
  219      *  fflush("")  --- flush everything
  220      * Now, both calls flush everything.
  221      */
  222 
  223     /* fflush() */
  224     if (nargs == 0) {
  225         status = flush_io();    // ERRNO updated
  226         return make_number((AWKNUM) status);
  227     }
  228 
  229     tmp = POP_STRING();
  230     file = tmp->stptr;
  231     len = tmp->stlen;
  232 
  233     /* fflush("") */
  234     if (tmp->stlen == 0) {
  235         status = flush_io();    // ERRNO updated
  236         DEREF(tmp);
  237         return make_number((AWKNUM) status);
  238     }
  239 
  240     /* fflush("/some/path") */
  241     rp = getredirect(tmp->stptr, tmp->stlen);
  242     status = -1;
  243     if (rp != NULL) {
  244         if ((rp->flag & (RED_WRITE|RED_APPEND)) == 0) {
  245             if ((rp->flag & RED_PIPE) != 0)
  246                 warning(_("fflush: cannot flush: pipe `%.*s' opened for reading, not writing"),
  247                     len, file);
  248             else
  249                 warning(_("fflush: cannot flush: file `%.*s' opened for reading, not writing"),
  250                     len, file);
  251             DEREF(tmp);
  252             return make_number((AWKNUM) status);
  253         }
  254         fp = rp->output.fp;
  255         if (fp != NULL) {
  256             status = rp->output.gawk_fflush(fp, rp->output.opaque);
  257 
  258             if (status != 0) {
  259                 if (! is_non_fatal_redirect(tmp->stptr, tmp->stlen))
  260                     fatal(_("fflush: cannot flush file `%.*s': %s"),
  261                         len, file, strerror(errno));
  262                 update_ERRNO_int(errno);
  263             }
  264         } else if ((rp->flag & RED_TWOWAY) != 0)
  265                 warning(_("fflush: cannot flush: two-way pipe `%.*s' has closed write end"),
  266                     len, file);
  267     } else if ((fp = stdfile(tmp->stptr, tmp->stlen)) != NULL) {
  268         status = (non_fatal_flush_std_file(fp) == false);
  269     } else {
  270         status = -1;
  271         warning(_("fflush: `%.*s' is not an open file, pipe or co-process"), len, file);
  272     }
  273     DEREF(tmp);
  274     return make_number((AWKNUM) status);
  275 }
  276 
  277 /* strncasecmpmbs --- like strncasecmp (multibyte string version)  */
  278 
  279 int
  280 strncasecmpmbs(const unsigned char *s1, const unsigned char *s2, size_t n)
  281 {
  282     size_t i1, i2, mbclen1, mbclen2, gap;
  283     wchar_t wc1, wc2;
  284     mbstate_t mbs1, mbs2;
  285 
  286     memset(& mbs1, 0, sizeof(mbs1));
  287     memset(& mbs2, 0, sizeof(mbs2));
  288 
  289     for (i1 = i2 = 0 ; i1 < n && i2 < n ;i1 += mbclen1, i2 += mbclen2) {
  290         if (is_valid_character(s1[i1])) {
  291             mbclen1 = 1;
  292             wc1 = btowc_cache(s1[i1]);
  293         } else {
  294             mbclen1 = mbrtowc(& wc1, (const char *)s1 + i1,
  295                       n - i1, & mbs1);
  296             if (mbclen1 == (size_t) -1 || mbclen1 == (size_t) -2 || mbclen1 == 0) {
  297                 /* We treat it as a singlebyte character. */
  298                 mbclen1 = 1;
  299                 wc1 = btowc_cache(s1[i1]);
  300             }
  301         }
  302         if (is_valid_character(s2[i2])) {
  303             mbclen2 = 1;
  304             wc2 = btowc_cache(s2[i2]);
  305         } else {
  306             mbclen2 = mbrtowc(& wc2, (const char *)s2 + i2,
  307                       n - i2, & mbs2);
  308             if (mbclen2 == (size_t) -1 || mbclen2 == (size_t) -2 || mbclen2 == 0) {
  309                 /* We treat it as a singlebyte character. */
  310                 mbclen2 = 1;
  311                 wc2 = btowc_cache(s2[i2]);
  312             }
  313         }
  314         if ((gap = towlower(wc1) - towlower(wc2)) != 0)
  315             /* s1 and s2 are not equivalent. */
  316             return gap;
  317     }
  318     /* s1 and s2 are equivalent. */
  319     return 0;
  320 }
  321 
  322 /* Inspect the buffer `src' and write the index of each byte to `dest'.
  323    Caller must allocate `dest'.
  324    e.g. str = <mb1(1)>, <mb1(2)>, a, b, <mb2(1)>, <mb2(2)>, <mb2(3)>, c
  325         where mb(i) means the `i'-th byte of a multibyte character.
  326         dest =       1,        2, 1, 1,        1,        2,        3. 1
  327 */
  328 static void
  329 index_multibyte_buffer(char* src, char* dest, int len)
  330 {
  331     int idx, prev_idx;
  332     mbstate_t mbs, prevs;
  333 
  334     memset(& prevs, 0, sizeof(mbstate_t));
  335     for (idx = prev_idx = 0 ; idx < len ; idx++) {
  336         size_t mbclen;
  337         mbs = prevs;
  338         mbclen = mbrlen(src + prev_idx, idx - prev_idx + 1, & mbs);
  339         if (mbclen == (size_t) -1 || mbclen == 1 || mbclen == 0) {
  340             /* singlebyte character.  */
  341             mbclen = 1;
  342             prev_idx = idx + 1;
  343         } else if (mbclen == (size_t) -2) {
  344             /* a part of a multibyte character.  */
  345             mbclen = idx - prev_idx + 1;
  346         } else if (mbclen > 1) {
  347             /* the end of a multibyte character.  */
  348             prev_idx = idx + 1;
  349             prevs = mbs;
  350         } else {
  351             /* Can't reach.  */
  352         }
  353         dest[idx] = mbclen;
  354     }
  355 }
  356 
  357 /* do_index --- find index of a string */
  358 
  359 NODE *
  360 do_index(int nargs)
  361 {
  362     NODE *s1, *s2;
  363     const char *p1, *p2;
  364     size_t l1, l2;
  365     long ret;
  366     bool do_single_byte = false;
  367     mbstate_t mbs1, mbs2;
  368 
  369     if (gawk_mb_cur_max > 1) {
  370         memset(& mbs1, 0, sizeof(mbstate_t));
  371         memset(& mbs2, 0, sizeof(mbstate_t));
  372     }
  373 
  374     POP_TWO_SCALARS(s1, s2);
  375 
  376     if (do_lint) {
  377         if ((fixtype(s1)->flags & STRING) == 0)
  378             lintwarn(_("%s: received non-string first argument"), "index");
  379         if ((fixtype(s2)->flags & STRING) == 0)
  380             lintwarn(_("%s: received non-string second argument"), "index");
  381     }
  382 
  383     s1 = force_string(s1);
  384     s2 = force_string(s2);
  385 
  386     p1 = s1->stptr;
  387     p2 = s2->stptr;
  388     l1 = s1->stlen;
  389     l2 = s2->stlen;
  390     ret = 0;
  391 
  392     /*
  393      * Icky special case, index(foo, "") should return 1,
  394      * since both bwk awk and mawk do, and since match("foo", "")
  395      * returns 1. This makes index("", "") work, too, fwiw.
  396      */
  397     if (l2 == 0) {
  398         ret = 1;
  399         goto out;
  400     }
  401 
  402     if (gawk_mb_cur_max > 1) {
  403         s1 = force_wstring(s1);
  404         s2 = force_wstring(s2);
  405         /*
  406          * If we don't have valid wide character strings, use
  407          * the real bytes.
  408          */
  409         do_single_byte = ((s1->wstlen == 0 && s1->stlen > 0)
  410                     || (s2->wstlen == 0 && s2->stlen > 0));
  411     }
  412 
  413     /* IGNORECASE will already be false if posix */
  414     if (IGNORECASE) {
  415         while (l1 > 0) {
  416             if (l2 > l1)
  417                 break;
  418             if (! do_single_byte && gawk_mb_cur_max > 1) {
  419                 const wchar_t *pos;
  420 
  421                 pos = wcasestrstr(s1->wstptr, s1->wstlen, s2->wstptr, s2->wstlen);
  422                 if (pos == NULL)
  423                     ret = 0;
  424                 else
  425                     ret = pos - s1->wstptr + 1; /* 1-based */
  426                 goto out;
  427             } else {
  428                 /*
  429                  * Could use tolower(*p1) == tolower(*p2) here.
  430                  * See discussion in eval.c as to why not.
  431                  */
  432                 if (casetable[(unsigned char)*p1] == casetable[(unsigned char)*p2]
  433                     && (l2 == 1 || strncasecmp(p1, p2, l2) == 0)) {
  434                     ret = 1 + s1->stlen - l1;
  435                     break;
  436                 }
  437                 l1--;
  438                 p1++;
  439             }
  440         }
  441     } else {
  442         while (l1 > 0) {
  443             if (l2 > l1)
  444                 break;
  445             if (*p1 == *p2
  446                 && (l2 == 1 || (l2 > 0 && memcmp(p1, p2, l2) == 0))) {
  447                 ret = 1 + s1->stlen - l1;
  448                 break;
  449             }
  450             if (! do_single_byte && gawk_mb_cur_max > 1) {
  451                 const wchar_t *pos;
  452 
  453                 pos = wstrstr(s1->wstptr, s1->wstlen, s2->wstptr, s2->wstlen);
  454                 if (pos == NULL)
  455                     ret = 0;
  456                 else
  457                     ret = pos - s1->wstptr + 1; /* 1-based */
  458                 goto out;
  459             } else {
  460                 l1--;
  461                 p1++;
  462             }
  463         }
  464     }
  465 out:
  466     DEREF(s1);
  467     DEREF(s2);
  468     return make_number((AWKNUM) ret);
  469 }
  470 
  471 /* double_to_int --- convert double to int, used several places */
  472 
  473 double
  474 double_to_int(double d)
  475 {
  476     if (d >= 0)
  477         d = floor(d);
  478     else
  479         d = ceil(d);
  480     return d;
  481 }
  482 
  483 /* do_int --- convert double to int for awk */
  484 
  485 NODE *
  486 do_int(int nargs)
  487 {
  488     NODE *tmp;
  489     double d;
  490 
  491     tmp = POP_SCALAR();
  492     if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0)
  493         lintwarn(_("%s: received non-numeric argument"), "int");
  494     d = force_number(tmp)->numbr;
  495     d = double_to_int(d);
  496     DEREF(tmp);
  497     return make_number((AWKNUM) d);
  498 }
  499 
  500 /* do_isarray --- check if argument is array */
  501 
  502 NODE *
  503 do_isarray(int nargs)
  504 {
  505     NODE *tmp;
  506     int ret = 1;
  507 
  508     tmp = POP();
  509     if (tmp->type != Node_var_array) {
  510         ret = 0;
  511         // could be Node_var_new
  512         if (tmp->type == Node_val)
  513             DEREF(tmp);
  514     }
  515     return make_number((AWKNUM) ret);
  516 }
  517 
  518 /* do_length --- length of a string, array or $0 */
  519 
  520 NODE *
  521 do_length(int nargs)
  522 {
  523     NODE *tmp;
  524     size_t len;
  525 
  526     tmp = POP();
  527     if (tmp->type == Node_var_array) {
  528         static bool warned = false;
  529         unsigned long size;
  530 
  531         if (do_posix)
  532             fatal(_("length: received array argument"));
  533         if (do_lint_extensions && ! warned) {
  534             warned = true;
  535             lintwarn(_("`length(array)' is a gawk extension"));
  536         }
  537 
  538         /*
  539          * Support for deferred loading of array elements requires that
  540          * we use the array length interface even though it isn't
  541          * necessary for the built-in array types.
  542          *
  543          * 1/2015: The deferred arrays are gone, but this is probably
  544          * still a good idea.
  545          */
  546 
  547         size = assoc_length(tmp);
  548         return make_number(size);
  549     }
  550 
  551     assert(tmp->type == Node_val);
  552 
  553     if (do_lint && (fixtype(tmp)->flags & STRING) == 0)
  554         lintwarn(_("%s: received non-string argument"), "length");
  555     tmp = force_string(tmp);
  556 
  557     if (gawk_mb_cur_max > 1) {
  558         tmp = force_wstring(tmp);
  559         len = tmp->wstlen;
  560         /*
  561          * If the bytes don't make a valid wide character
  562          * string, fall back to the bytes themselves.
  563          */
  564          if (len == 0 && tmp->stlen > 0)
  565              len = tmp->stlen;
  566     } else
  567         len = tmp->stlen;
  568 
  569     DEREF(tmp);
  570     return make_number((AWKNUM) len);
  571 }
  572 
  573 /* do_log --- the log function */
  574 
  575 NODE *
  576 do_log(int nargs)
  577 {
  578     NODE *tmp;
  579     double d, arg;
  580 
  581     tmp = POP_SCALAR();
  582     if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0)
  583         lintwarn(_("%s: received non-numeric argument"), "log");
  584     arg = force_number(tmp)->numbr;
  585     if (arg < 0.0)
  586         warning(_("%s: received negative argument %g"), "log", arg);
  587     d = log(arg);
  588     DEREF(tmp);
  589     return make_number((AWKNUM) d);
  590 }
  591 
  592 
  593 #ifdef HAVE_MPFR
  594 
  595 /*
  596  * mpz2mpfr --- convert an arbitrary-precision integer to a float
  597  *  without any loss of precision. The returned value is only
  598  *  good for temporary use.
  599  */
  600 
  601 
  602 static mpfr_ptr
  603 mpz2mpfr(mpz_ptr zi)
  604 {
  605     size_t prec;
  606     static mpfr_t mpfrval;
  607     static bool inited = false;
  608     int tval;
  609 
  610     /* estimate minimum precision for exact conversion */
  611     prec = mpz_sizeinbase(zi, 2);   /* most significant 1 bit position starting at 1 */
  612     prec -= (size_t) mpz_scan1(zi, 0);  /* least significant 1 bit index starting at 0 */
  613     if (prec < MPFR_PREC_MIN)
  614         prec = MPFR_PREC_MIN;
  615     else if (prec > MPFR_PREC_MAX)
  616         prec = MPFR_PREC_MAX;
  617 
  618     if (! inited) {
  619         mpfr_init2(mpfrval, prec);
  620         inited = true;
  621     } else
  622         mpfr_set_prec(mpfrval, prec);
  623     tval = mpfr_set_z(mpfrval, zi, ROUND_MODE);
  624     IEEE_FMT(mpfrval, tval);
  625     return mpfrval;
  626 }
  627 #endif
  628 
  629 /*
  630  * format_tree() formats arguments of sprintf,
  631  * and accordingly to a fmt_string providing a format like in
  632  * printf family from C library.  Returns a string node which value
  633  * is a formatted string.  Called by  sprintf function.
  634  *
  635  * It is one of the uglier parts of gawk.  Thanks to Michal Jaegermann
  636  * for taming this beast and making it compatible with ANSI C.
  637  */
  638 
  639 NODE *
  640 format_tree(
  641     const char *fmt_string,
  642     size_t n0,
  643     NODE **the_args,
  644     long num_args)
  645 {
  646 /* copy 'l' bytes from 's' to 'obufout' checking for space in the process */
  647 /* difference of pointers should be of ptrdiff_t type, but let us be kind */
  648 #define bchunk(s, l) if (l) { \
  649     while ((l) > ofre) { \
  650         size_t olen = obufout - obuf; \
  651         erealloc(obuf, char *, osiz * 2, "format_tree"); \
  652         ofre += osiz; \
  653         osiz *= 2; \
  654         obufout = obuf + olen; \
  655     } \
  656     memcpy(obufout, s, (size_t) (l)); \
  657     obufout += (l); \
  658     ofre -= (l); \
  659 }
  660 
  661 /* copy one byte from 's' to 'obufout' checking for space in the process */
  662 #define bchunk_one(s) { \
  663     if (ofre < 1) { \
  664         size_t olen = obufout - obuf; \
  665         erealloc(obuf, char *, osiz * 2, "format_tree"); \
  666         ofre += osiz; \
  667         osiz *= 2; \
  668         obufout = obuf + olen; \
  669     } \
  670     *obufout++ = *s; \
  671     --ofre; \
  672 }
  673 
  674 /* Is there space for something L big in the buffer? */
  675 #define chksize(l)  if ((l) >= ofre) { \
  676     size_t olen = obufout - obuf; \
  677     size_t delta = osiz+l-ofre; \
  678     erealloc(obuf, char *, osiz + delta, "format_tree"); \
  679     obufout = obuf + olen; \
  680     ofre += delta; \
  681     osiz += delta; \
  682 }
  683 
  684     size_t cur_arg = 0;
  685     NODE *r = NULL;
  686     int i, nc;
  687     bool toofew = false;
  688     char *obuf, *obufout;
  689     size_t osiz, ofre, olen_final;
  690     const char *chbuf;
  691     const char *s0, *s1;
  692     int cs1;
  693     NODE *arg;
  694     long fw, prec, argnum;
  695     bool used_dollar;
  696     bool lj, alt, big_flag, bigbig_flag, small_flag, have_prec, need_format;
  697     long *cur = NULL;
  698     uintmax_t uval;
  699     bool sgn;
  700     int base;
  701     /*
  702      * Although this is an array, the elements serve two different
  703      * purposes. The first element is the general buffer meant
  704      * to hold the entire result string.  The second one is a
  705      * temporary buffer for large floating point values. They
  706      * could just as easily be separate variables, and the
  707      * code might arguably be clearer.
  708      */
  709     struct {
  710         char *buf;
  711         size_t bufsize;
  712         char stackbuf[30];
  713     } cpbufs[2];
  714 #define cpbuf   cpbufs[0].buf
  715     char *cend = &cpbufs[0].stackbuf[sizeof(cpbufs[0].stackbuf)];
  716     char *cp;
  717     const char *fill;
  718     AWKNUM tmpval = 0.0;
  719     char signchar = '\0';
  720     size_t len;
  721     bool zero_flag = false;
  722     bool quote_flag = false;
  723     int ii, jj;
  724     char *chp;
  725     size_t copy_count, char_count;
  726     char *nan_inf_val;
  727     bool magic_posix_flag;
  728 #ifdef HAVE_MPFR
  729     mpz_ptr zi;
  730     mpfr_ptr mf;
  731 #endif
  732     enum { MP_NONE = 0, MP_INT_WITH_PREC = 1, MP_INT_WITHOUT_PREC, MP_FLOAT } fmt_type;
  733 
  734     static const char sp[] = " ";
  735     static const char zero_string[] = "0";
  736     static const char lchbuf[] = "0123456789abcdef";
  737     static const char Uchbuf[] = "0123456789ABCDEF";
  738 
  739 #define INITIAL_OUT_SIZE    64
  740     emalloc(obuf, char *, INITIAL_OUT_SIZE, "format_tree");
  741     obufout = obuf;
  742     osiz = INITIAL_OUT_SIZE;
  743     ofre = osiz - 1;
  744 
  745     cur_arg = 1;
  746 
  747     {
  748         size_t k;
  749         for (k = 0; k < sizeof(cpbufs)/sizeof(cpbufs[0]); k++) {
  750             cpbufs[k].bufsize = sizeof(cpbufs[k].stackbuf);
  751             cpbufs[k].buf = cpbufs[k].stackbuf;
  752         }
  753     }
  754 
  755     /*
  756      * The point of this goop is to grow the buffer
  757      * holding the converted number, so that large
  758      * values don't overflow a fixed length buffer.
  759      */
  760 #define PREPEND(CH) do {    \
  761     if (cp == cpbufs[0].buf) {  \
  762         char *prev = cpbufs[0].buf; \
  763         emalloc(cpbufs[0].buf, char *, 2*cpbufs[0].bufsize, \
  764             "format_tree"); \
  765         memcpy((cp = cpbufs[0].buf+cpbufs[0].bufsize), prev,    \
  766                cpbufs[0].bufsize);  \
  767         cpbufs[0].bufsize *= 2; \
  768         if (prev != cpbufs[0].stackbuf) \
  769             efree(prev);    \
  770         cend = cpbufs[0].buf+cpbufs[0].bufsize; \
  771     }   \
  772     *--cp = (CH);   \
  773 } while(0)
  774 
  775     /*
  776      * Check first for use of `count$'.
  777      * If plain argument retrieval was used earlier, choke.
  778      *  Otherwise, return the requested argument.
  779      * If not `count$' now, but it was used earlier, choke.
  780      * If this format is more than total number of args, choke.
  781      * Otherwise, return the current argument.
  782      */
  783 #define parse_next_arg() { \
  784     if (argnum > 0) { \
  785         if (cur_arg > 1) { \
  786             msg(_("fatal: must use `count$' on all formats or none")); \
  787             goto out; \
  788         } \
  789         arg = the_args[argnum]; \
  790     } else if (used_dollar) { \
  791         msg(_("fatal: must use `count$' on all formats or none")); \
  792         arg = 0; /* shutup the compiler */ \
  793         goto out; \
  794     } else if (cur_arg >= num_args) { \
  795         arg = 0; /* shutup the compiler */ \
  796         toofew = true; \
  797         break; \
  798     } else { \
  799         arg = the_args[cur_arg]; \
  800         cur_arg++; \
  801     } \
  802 }
  803 
  804     need_format = false;
  805     used_dollar = false;
  806 
  807     s0 = s1 = fmt_string;
  808     while (n0-- > 0) {
  809         if (*s1 != '%') {
  810             s1++;
  811             continue;
  812         }
  813         need_format = true;
  814         bchunk(s0, s1 - s0);
  815         s0 = s1;
  816         cur = &fw;
  817         fw = 0;
  818         prec = 0;
  819         base = 0;
  820         argnum = 0;
  821         base = 0;
  822         have_prec = false;
  823         signchar = '\0';
  824         zero_flag = false;
  825         quote_flag = false;
  826         nan_inf_val = NULL;
  827 #ifdef HAVE_MPFR
  828         mf = NULL;
  829         zi = NULL;
  830 #endif
  831         fmt_type = MP_NONE;
  832 
  833         lj = alt = big_flag = bigbig_flag = small_flag = false;
  834         magic_posix_flag = false;
  835         fill = sp;
  836         cp = cend;
  837         chbuf = lchbuf;
  838         s1++;
  839 
  840 retry:
  841         if (n0-- == 0)  /* ran out early! */
  842             break;
  843 
  844         switch (cs1 = *s1++) {
  845         case (-1):  /* dummy case to allow for checking */
  846 check_pos:
  847             if (cur != &fw)
  848                 break;      /* reject as a valid format */
  849             goto retry;
  850         case '%':
  851             need_format = false;
  852             /*
  853              * 29 Oct. 2002:
  854              * The C99 standard pages 274 and 279 seem to imply that
  855              * since there's no arg converted, the field width doesn't
  856              * apply.  The code already was that way, but this
  857              * comment documents it, at least in the code.
  858              */
  859             if (do_lint) {
  860                 const char *msg = NULL;
  861 
  862                 if (fw && ! have_prec)
  863                     msg = _("field width is ignored for `%%' specifier");
  864                 else if (fw == 0 && have_prec)
  865                     msg = _("precision is ignored for `%%' specifier");
  866                 else if (fw && have_prec)
  867                     msg = _("field width and precision are ignored for `%%' specifier");
  868 
  869                 if (msg != NULL)
  870                     lintwarn("%s", msg);
  871             }
  872             bchunk_one("%");
  873             s0 = s1;
  874             break;
  875 
  876         case '0':
  877             /*
  878              * Only turn on zero_flag if we haven't seen
  879              * the field width or precision yet.  Otherwise,
  880              * screws up floating point formatting.
  881              */
  882             if (cur == & fw)
  883                 zero_flag = true;
  884             if (lj)
  885                 goto retry;
  886             /* FALL through */
  887         case '1':
  888         case '2':
  889         case '3':
  890         case '4':
  891         case '5':
  892         case '6':
  893         case '7':
  894         case '8':
  895         case '9':
  896             if (cur == NULL)
  897                 break;
  898             if (prec >= 0)
  899                 *cur = cs1 - '0';
  900             /*
  901              * with a negative precision *cur is already set
  902              * to -1, so it will remain negative, but we have
  903              * to "eat" precision digits in any case
  904              */
  905             while (n0 > 0 && *s1 >= '0' && *s1 <= '9') {
  906                 --n0;
  907                 *cur = *cur * 10 + *s1++ - '0';
  908             }
  909             if (prec < 0)   /* negative precision is discarded */
  910                 have_prec = false;
  911             if (cur == &prec)
  912                 cur = NULL;
  913             if (n0 == 0)    /* badly formatted control string */
  914                 continue;
  915             goto retry;
  916         case '$':
  917             if (do_traditional) {
  918                 msg(_("fatal: `$' is not permitted in awk formats"));
  919                 goto out;
  920             }
  921 
  922             if (cur == &fw) {
  923                 argnum = fw;
  924                 fw = 0;
  925                 used_dollar = true;
  926                 if (argnum <= 0) {
  927                     msg(_("fatal: argument index with `$' must be > 0"));
  928                     goto out;
  929                 }
  930                 if (argnum >= num_args) {
  931                     msg(_("fatal: argument index %ld greater than total number of supplied arguments"), argnum);
  932                     goto out;
  933                 }
  934             } else {
  935                 msg(_("fatal: `$' not permitted after period in format"));
  936                 goto out;
  937             }
  938 
  939             goto retry;
  940         case '*':
  941             if (cur == NULL)
  942                 break;
  943             if (! do_traditional && used_dollar && ! isdigit((unsigned char) *s1)) {
  944                 fatal(_("fatal: must use `count$' on all formats or none"));
  945                 break;  /* silence warnings */
  946             } else if (! do_traditional && isdigit((unsigned char) *s1)) {
  947                 int val = 0;
  948 
  949                 for (; n0 > 0 && *s1 && isdigit((unsigned char) *s1); s1++, n0--) {
  950                     val *= 10;
  951                     val += *s1 - '0';
  952                 }
  953                 if (*s1 != '$') {
  954                     msg(_("fatal: no `$' supplied for positional field width or precision"));
  955                     goto out;
  956                 } else {
  957                     s1++;
  958                     n0--;
  959                 }
  960                 if (val >= num_args) {
  961                     toofew = true;
  962                     break;
  963                 }
  964                 arg = the_args[val];
  965             } else {
  966                 parse_next_arg();
  967             }
  968             (void) force_number(arg);
  969             *cur = get_number_si(arg);
  970             if (*cur < 0 && cur == &fw) {
  971                 *cur = -*cur;
  972                 lj = true;
  973             }
  974             if (cur == &prec) {
  975                 if (*cur >= 0)
  976                     have_prec = true;
  977                 else
  978                     have_prec = false;
  979                 cur = NULL;
  980             }
  981             goto retry;
  982         case ' ':       /* print ' ' or '-' */
  983                     /* 'space' flag is ignored */
  984                     /* if '+' already present  */
  985             if (signchar != false)
  986                 goto check_pos;
  987             /* FALL THROUGH */
  988         case '+':       /* print '+' or '-' */
  989             signchar = cs1;
  990             goto check_pos;
  991         case '-':
  992             if (prec < 0)
  993                 break;
  994             if (cur == &prec) {
  995                 prec = -1;
  996                 goto retry;
  997             }
  998             fill = sp;      /* if left justified then other */
  999             lj = true;  /* filling is ignored */
 1000             goto check_pos;
 1001         case '.':
 1002             if (cur != &fw)
 1003                 break;
 1004             cur = &prec;
 1005             have_prec = true;
 1006             goto retry;
 1007         case '#':
 1008             alt = true;
 1009             goto check_pos;
 1010         case '\'':
 1011 #if defined(HAVE_LOCALE_H)
 1012             quote_flag = true;
 1013             goto check_pos;
 1014 #else
 1015             goto retry;
 1016 #endif
 1017         case 'l':
 1018             if (big_flag)
 1019                 break;
 1020             else {
 1021                 static bool warned = false;
 1022 
 1023                 if (do_lint && ! warned) {
 1024                     lintwarn(_("`l' is meaningless in awk formats; ignored"));
 1025                     warned = true;
 1026                 }
 1027                 if (do_posix) {
 1028                     msg(_("fatal: `l' is not permitted in POSIX awk formats"));
 1029                     goto out;
 1030                 }
 1031             }
 1032             big_flag = true;
 1033             goto retry;
 1034         case 'L':
 1035             if (bigbig_flag)
 1036                 break;
 1037             else {
 1038                 static bool warned = false;
 1039 
 1040                 if (do_lint && ! warned) {
 1041                     lintwarn(_("`L' is meaningless in awk formats; ignored"));
 1042                     warned = true;
 1043                 }
 1044                 if (do_posix) {
 1045                     msg(_("fatal: `L' is not permitted in POSIX awk formats"));
 1046                     goto out;
 1047                 }
 1048             }
 1049             bigbig_flag = true;
 1050             goto retry;
 1051         case 'h':
 1052             if (small_flag)
 1053                 break;
 1054             else {
 1055                 static bool warned = false;
 1056 
 1057                 if (do_lint && ! warned) {
 1058                     lintwarn(_("`h' is meaningless in awk formats; ignored"));
 1059                     warned = true;
 1060                 }
 1061                 if (do_posix) {
 1062                     msg(_("fatal: `h' is not permitted in POSIX awk formats"));
 1063                     goto out;
 1064                 }
 1065             }
 1066             small_flag = true;
 1067             goto retry;
 1068         case 'P':
 1069             if (magic_posix_flag)
 1070                 break;
 1071             magic_posix_flag = true;
 1072             goto retry;
 1073         case 'c':
 1074             need_format = false;
 1075             parse_next_arg();
 1076             /* user input that looks numeric is numeric */
 1077             fixtype(arg);
 1078             if ((arg->flags & NUMBER) != 0) {
 1079                 uval = get_number_uj(arg);
 1080                 if (gawk_mb_cur_max > 1) {
 1081                     char buf[100];
 1082                     wchar_t wc;
 1083                     mbstate_t mbs;
 1084                     size_t count;
 1085 
 1086                     memset(& mbs, 0, sizeof(mbs));
 1087 
 1088                     /* handle systems with too small wchar_t */
 1089                     if (sizeof(wchar_t) < 4 && uval > 0xffff) {
 1090                         if (do_lint)
 1091                             lintwarn(
 1092                         _("[s]printf: value %g is too big for %%c format"),
 1093                                     arg->numbr);
 1094 
 1095                         goto out0;
 1096                     }
 1097 
 1098                     wc = uval;
 1099 
 1100                     count = wcrtomb(buf, wc, & mbs);
 1101                     if (count == 0
 1102                         || count == (size_t) -1) {
 1103                         if (do_lint)
 1104                             lintwarn(
 1105                         _("[s]printf: value %g is not a valid wide character"),
 1106                                     arg->numbr);
 1107 
 1108                         goto out0;
 1109                     }
 1110 
 1111                     memcpy(cpbuf, buf, count);
 1112                     prec = count;
 1113                     cp = cpbuf;
 1114                     goto pr_tail;
 1115                 }
 1116 out0:
 1117                 ;
 1118                 /* else,
 1119                     fall through */
 1120 
 1121                 cpbuf[0] = uval;
 1122                 prec = 1;
 1123                 cp = cpbuf;
 1124                 goto pr_tail;
 1125             }
 1126             /*
 1127              * As per POSIX, only output first character of a
 1128              * string value.  Thus, we ignore any provided
 1129              * precision, forcing it to 1.  (Didn't this
 1130              * used to work? 6/2003.)
 1131              */
 1132             cp = arg->stptr;
 1133             prec = 1;
 1134             /*
 1135              * First character can be multiple bytes if
 1136              * it's a multibyte character. Grr.
 1137              */
 1138             if (gawk_mb_cur_max > 1) {
 1139                 mbstate_t state;
 1140                 size_t count;
 1141 
 1142                 memset(& state, 0, sizeof(state));
 1143                 count = mbrlen(cp, arg->stlen, & state);
 1144                 if (count != (size_t) -1 && count != (size_t) -2 && count > 0) {
 1145                     prec = count;
 1146                     /* may need to increase fw so that padding happens, see pr_tail code */
 1147                     if (fw > 0)
 1148                         fw += count - 1;
 1149                 }
 1150             }
 1151             goto pr_tail;
 1152         case 's':
 1153             need_format = false;
 1154             parse_next_arg();
 1155             arg = force_string(arg);
 1156             if (fw == 0 && ! have_prec)
 1157                 prec = arg->stlen;
 1158             else {
 1159                 char_count = mbc_char_count(arg->stptr, arg->stlen);
 1160                 if (! have_prec || prec > char_count)
 1161                     prec = char_count;
 1162             }
 1163             cp = arg->stptr;
 1164             goto pr_tail;
 1165         case 'd':
 1166         case 'i':
 1167             need_format = false;
 1168             parse_next_arg();
 1169             (void) force_number(arg);
 1170 
 1171             /*
 1172              * Check for Nan or Inf.
 1173              */
 1174             if (out_of_range(arg))
 1175                 goto out_of_range;
 1176 #ifdef HAVE_MPFR
 1177             if (is_mpg_float(arg))
 1178                 goto mpf0;
 1179             else if (is_mpg_integer(arg))
 1180                 goto mpz0;
 1181             else
 1182 #endif
 1183             tmpval = double_to_int(arg->numbr);
 1184 
 1185             /*
 1186              * ``The result of converting a zero value with a
 1187              * precision of zero is no characters.''
 1188              */
 1189             if (have_prec && prec == 0 && tmpval == 0)
 1190                 goto pr_tail;
 1191 
 1192             if (tmpval < 0) {
 1193                 tmpval = -tmpval;
 1194                 sgn = true;
 1195             } else {
 1196                 if (tmpval == -0.0)
 1197                     /* avoid printing -0 */
 1198                     tmpval = 0.0;
 1199                 sgn = false;
 1200             }
 1201             /*
 1202              * Use snprintf return value to tell if there
 1203              * is enough room in the buffer or not.
 1204              */
 1205             while ((i = snprintf(cpbufs[1].buf,
 1206                          cpbufs[1].bufsize, "%.0f",
 1207                          tmpval)) >=
 1208                    cpbufs[1].bufsize) {
 1209                 if (cpbufs[1].buf == cpbufs[1].stackbuf)
 1210                     cpbufs[1].buf = NULL;
 1211                 if (i > 0) {
 1212                     cpbufs[1].bufsize += ((i > cpbufs[1].bufsize) ?
 1213                                   i : cpbufs[1].bufsize);
 1214                 }
 1215                 else
 1216                     cpbufs[1].bufsize *= 2;
 1217                 assert(cpbufs[1].bufsize > 0);
 1218                 erealloc(cpbufs[1].buf, char *,
 1219                      cpbufs[1].bufsize, "format_tree");
 1220             }
 1221             if (i < 1)
 1222                 goto out_of_range;
 1223 #if defined(HAVE_LOCALE_H)
 1224             quote_flag = (quote_flag && loc.thousands_sep[0] != 0);
 1225 #endif
 1226             chp = &cpbufs[1].buf[i-1];
 1227             ii = jj = 0;
 1228             do {
 1229                 PREPEND(*chp);
 1230                 chp--; i--;
 1231 #if defined(HAVE_LOCALE_H)
 1232                 if (quote_flag && loc.grouping[ii] && ++jj == loc.grouping[ii]) {
 1233                     if (i) {    /* only add if more digits coming */
 1234                         int k;
 1235                         const char *ts = loc.thousands_sep;
 1236 
 1237                         for (k = strlen(ts) - 1; k >= 0; k--) {
 1238                             PREPEND(ts[k]);
 1239                         }
 1240                     }
 1241                     if (loc.grouping[ii+1] == 0)
 1242                         jj = 0;     /* keep using current val in loc.grouping[ii] */
 1243                     else if (loc.grouping[ii+1] == CHAR_MAX)
 1244                         quote_flag = false;
 1245                     else {
 1246                         ii++;
 1247                         jj = 0;
 1248                     }
 1249                 }
 1250 #endif
 1251             } while (i > 0);
 1252 
 1253             /* add more output digits to match the precision */
 1254             if (have_prec) {
 1255                 while (cend - cp < prec)
 1256                     PREPEND('0');
 1257             }
 1258 
 1259             if (sgn)
 1260                 PREPEND('-');
 1261             else if (signchar)
 1262                 PREPEND(signchar);
 1263             /*
 1264              * When to fill with zeroes is of course not simple.
 1265              * First: No zero fill if left-justifying.
 1266              * Next: There seem to be two cases:
 1267              *  A '0' without a precision, e.g. %06d
 1268              *  A precision with no field width, e.g. %.10d
 1269              * Any other case, we don't want to fill with zeroes.
 1270              */
 1271             if (! lj
 1272                 && ((zero_flag && ! have_prec)
 1273                  || (fw == 0 && have_prec)))
 1274                 fill = zero_string;
 1275             if (prec > fw)
 1276                 fw = prec;
 1277             prec = cend - cp;
 1278             if (fw > prec && ! lj && fill != sp
 1279                 && (*cp == '-' || signchar)) {
 1280                 bchunk_one(cp);
 1281                 cp++;
 1282                 prec--;
 1283                 fw--;
 1284             }
 1285             goto pr_tail;
 1286         case 'X':
 1287             chbuf = Uchbuf; /* FALL THROUGH */
 1288         case 'x':
 1289             base += 6;  /* FALL THROUGH */
 1290         case 'u':
 1291             base += 2;  /* FALL THROUGH */
 1292         case 'o':
 1293             base += 8;
 1294             need_format = false;
 1295             parse_next_arg();
 1296             (void) force_number(arg);
 1297 
 1298             if (out_of_range(arg))
 1299                 goto out_of_range;
 1300 #ifdef HAVE_MPFR
 1301             if (is_mpg_integer(arg)) {
 1302 mpz0:
 1303                 zi = arg->mpg_i;
 1304 
 1305                 if (cs1 != 'd' && cs1 != 'i') {
 1306                     if (mpz_sgn(zi) <= 0) {
 1307                         /*
 1308                          * Negative value or 0 requires special handling.
 1309                          * Unlike MPFR, GMP does not allow conversion
 1310                          * to (u)intmax_t. So we first convert GMP type to
 1311                          * a MPFR type.
 1312                          */
 1313                         mf = mpz2mpfr(zi);
 1314                         goto mpf1;
 1315                     }
 1316                     signchar = '\0';    /* Don't print '+' */
 1317                 }
 1318 
 1319                 /* See comments above about when to fill with zeros */
 1320                 zero_flag = (! lj
 1321                             && ((zero_flag && ! have_prec)
 1322                              || (fw == 0 && have_prec)));
 1323 
 1324                 fmt_type = have_prec ? MP_INT_WITH_PREC : MP_INT_WITHOUT_PREC;
 1325                 goto fmt0;
 1326 
 1327             } else if (is_mpg_float(arg)) {
 1328 mpf0:
 1329                 mf = arg->mpg_numbr;
 1330                 if (! mpfr_number_p(mf)) {
 1331                     /* inf or NaN */
 1332                     cs1 = 'g';
 1333                     fmt_type = MP_FLOAT;
 1334                     goto fmt1;
 1335                 }
 1336 
 1337                 if (cs1 != 'd' && cs1 != 'i') {
 1338 mpf1:
 1339                     /*
 1340                      * The output of printf("%#.0x", 0) is 0 instead of 0x, hence <= in
 1341                      * the comparison below.
 1342                      */
 1343                     if (mpfr_sgn(mf) <= 0) {
 1344                         if (! mpfr_fits_intmax_p(mf, ROUND_MODE)) {
 1345                             /* -ve number is too large */
 1346                             cs1 = 'g';
 1347                             fmt_type = MP_FLOAT;
 1348                             goto fmt1;
 1349                         }
 1350 
 1351                         tmpval = uval = (uintmax_t) mpfr_get_sj(mf, ROUND_MODE);
 1352                         if (! alt && have_prec && prec == 0 && tmpval == 0)
 1353                             goto pr_tail;   /* printf("%.0x", 0) is no characters */
 1354                         goto int0;
 1355                     }
 1356                     signchar = '\0';    /* Don't print '+' */
 1357                 }
 1358 
 1359                 /* See comments above about when to fill with zeros */
 1360                 zero_flag = (! lj
 1361                             && ((zero_flag && ! have_prec)
 1362                              || (fw == 0 && have_prec)));
 1363 
 1364                 (void) mpfr_get_z(mpzval, mf, MPFR_RNDZ);   /* convert to GMP integer */
 1365                 fmt_type = have_prec ? MP_INT_WITH_PREC : MP_INT_WITHOUT_PREC;
 1366                 zi = mpzval;
 1367                 goto fmt0;
 1368             } else
 1369 #endif
 1370                 tmpval = arg->numbr;
 1371 
 1372             /*
 1373              * ``The result of converting a zero value with a
 1374              * precision of zero is no characters.''
 1375              *
 1376              * If I remember the ANSI C standard, though,
 1377              * it says that for octal conversions
 1378              * the precision is artificially increased
 1379              * to add an extra 0 if # is supplied.
 1380              * Indeed, in C,
 1381              *  printf("%#.0o\n", 0);
 1382              * prints a single 0.
 1383              */
 1384             if (! alt && have_prec && prec == 0 && tmpval == 0)
 1385                 goto pr_tail;
 1386 
 1387             if (tmpval < 0) {
 1388                 uval = (uintmax_t) (intmax_t) tmpval;
 1389                 if ((AWKNUM)(intmax_t)uval != double_to_int(tmpval))
 1390                     goto out_of_range;
 1391             } else {
 1392                 uval = (uintmax_t) tmpval;
 1393                 if ((AWKNUM)uval != double_to_int(tmpval))
 1394                     goto out_of_range;
 1395             }
 1396 #ifdef HAVE_MPFR
 1397     int0:
 1398 #endif
 1399 #if defined(HAVE_LOCALE_H)
 1400             quote_flag = (quote_flag && loc.thousands_sep[0] != 0);
 1401 #endif
 1402             /*
 1403              * When to fill with zeroes is of course not simple.
 1404              * First: No zero fill if left-justifying.
 1405              * Next: There seem to be two cases:
 1406              *  A '0' without a precision, e.g. %06d
 1407              *  A precision with no field width, e.g. %.10d
 1408              * Any other case, we don't want to fill with zeroes.
 1409              */
 1410             if (! lj
 1411                 && ((zero_flag && ! have_prec)
 1412                  || (fw == 0 && have_prec)))
 1413                 fill = zero_string;
 1414             ii = jj = 0;
 1415             do {
 1416                 PREPEND(chbuf[uval % base]);
 1417                 uval /= base;
 1418 #if defined(HAVE_LOCALE_H)
 1419                 if (base == 10 && quote_flag && loc.grouping[ii] && ++jj == loc.grouping[ii]) {
 1420                     if (uval) { /* only add if more digits coming */
 1421                         int k;
 1422                         const char *ts = loc.thousands_sep;
 1423 
 1424                         for (k = strlen(ts) - 1; k >= 0; k--) {
 1425                             PREPEND(ts[k]);
 1426                         }
 1427                     }
 1428                     if (loc.grouping[ii+1] == 0)
 1429                         jj = 0;     /* keep using current val in loc.grouping[ii] */
 1430                     else if (loc.grouping[ii+1] == CHAR_MAX)
 1431                         quote_flag = false;
 1432                     else {
 1433                         ii++;
 1434                         jj = 0;
 1435                     }
 1436                 }
 1437 #endif
 1438             } while (uval > 0);
 1439 
 1440             /* add more output digits to match the precision */
 1441             if (have_prec) {
 1442                 while (cend - cp < prec)
 1443                     PREPEND('0');
 1444             }
 1445 
 1446             if (alt && tmpval != 0) {
 1447                 if (base == 16) {
 1448                     PREPEND(cs1);
 1449                     PREPEND('0');
 1450                     if (fill != sp) {
 1451                         bchunk(cp, 2);
 1452                         cp += 2;
 1453                         fw -= 2;
 1454                     }
 1455                 } else if (base == 8)
 1456                     PREPEND('0');
 1457             }
 1458             base = 0;
 1459             if (prec > fw)
 1460                 fw = prec;
 1461             prec = cend - cp;
 1462     pr_tail:
 1463             if (! lj) {
 1464                 while (fw > prec) {
 1465                         bchunk_one(fill);
 1466                     fw--;
 1467                 }
 1468             }
 1469             copy_count = prec;
 1470             if (fw == 0 && ! have_prec)
 1471                 ;
 1472             else if (gawk_mb_cur_max > 1) {
 1473                 if (cs1 == 's') {
 1474                     assert(cp == arg->stptr || cp == cpbuf);
 1475                     copy_count = mbc_byte_count(arg->stptr, prec);
 1476                 }
 1477                 /* prec was set by code for %c */
 1478                 /* else
 1479                     copy_count = prec; */
 1480             }
 1481             bchunk(cp, copy_count);
 1482             while (fw > prec) {
 1483                 bchunk_one(fill);
 1484                 fw--;
 1485             }
 1486             s0 = s1;
 1487             break;
 1488 
 1489      out_of_range:
 1490             /*
 1491              * out of range - emergency use of %g format,
 1492              * or format NaN and INF values.
 1493              */
 1494             nan_inf_val = format_nan_inf(arg, cs1);
 1495             if (do_posix || magic_posix_flag || nan_inf_val == NULL) {
 1496                 if (do_lint && ! do_posix && ! magic_posix_flag)
 1497                     lintwarn(_("[s]printf: value %g is out of range for `%%%c' format"),
 1498                                 (double) tmpval, cs1);
 1499                 tmpval = arg->numbr;
 1500                 if (strchr("aAeEfFgG", cs1) == NULL)
 1501                     cs1 = 'g';
 1502                 goto fmt1;
 1503             } else {
 1504                 if (do_lint)
 1505                     lintwarn(_("[s]printf: value %s is out of range for `%%%c' format"),
 1506                                 nan_inf_val, cs1);
 1507                 bchunk(nan_inf_val, strlen(nan_inf_val));
 1508                 s0 = s1;
 1509                 break;
 1510             }
 1511 
 1512         case 'F':
 1513 #if ! defined(PRINTF_HAS_F_FORMAT) || PRINTF_HAS_F_FORMAT != 1
 1514             cs1 = 'f';
 1515             /* FALL THROUGH */
 1516 #endif
 1517         case 'g':
 1518         case 'G':
 1519         case 'e':
 1520         case 'f':
 1521         case 'E':
 1522 #if defined(PRINTF_HAS_A_FORMAT) && PRINTF_HAS_A_FORMAT == 1
 1523         case 'A':
 1524         case 'a':
 1525         {
 1526             static bool warned = false;
 1527 
 1528             if (do_lint && tolower(cs1) == 'a' && ! warned) {
 1529                 warned = true;
 1530                 lintwarn(_("%%%c format is POSIX standard but not portable to other awks"), cs1);
 1531             }
 1532         }
 1533 #endif
 1534             need_format = false;
 1535             parse_next_arg();
 1536             (void) force_number(arg);
 1537 
 1538             if (! is_mpg_number(arg))
 1539                 tmpval = arg->numbr;
 1540 #ifdef HAVE_MPFR
 1541             else if (is_mpg_float(arg)) {
 1542                 mf = arg->mpg_numbr;
 1543                 fmt_type = MP_FLOAT;
 1544             } else {
 1545                 /* arbitrary-precision integer, convert to MPFR float */
 1546                 assert(mf == NULL);
 1547                 mf = mpz2mpfr(arg->mpg_i);
 1548                 fmt_type = MP_FLOAT;
 1549             }
 1550 #endif
 1551             if (out_of_range(arg))
 1552                 goto out_of_range;
 1553 
 1554      fmt1:
 1555             if (! have_prec)
 1556                 prec = DEFAULT_G_PRECISION;
 1557 #ifdef HAVE_MPFR
 1558      fmt0:
 1559 #endif
 1560             chksize(fw + prec + 11);    /* 11 == slop */
 1561             cp = cpbuf;
 1562             *cp++ = '%';
 1563             if (lj)
 1564                 *cp++ = '-';
 1565             if (signchar)
 1566                 *cp++ = signchar;
 1567             if (alt)
 1568                 *cp++ = '#';
 1569             if (zero_flag)
 1570                 *cp++ = '0';
 1571             if (quote_flag)
 1572                 *cp++ = '\'';
 1573 
 1574 #if defined(LC_NUMERIC)
 1575             if (quote_flag && ! use_lc_numeric)
 1576                 setlocale(LC_NUMERIC, "");
 1577 #endif
 1578 
 1579             switch (fmt_type) {
 1580 #ifdef HAVE_MPFR
 1581             case MP_INT_WITH_PREC:
 1582                 sprintf(cp, "*.*Z%c", cs1);
 1583                 while ((nc = mpfr_snprintf(obufout, ofre, cpbuf,
 1584                          (int) fw, (int) prec, zi)) >= ofre)
 1585                     chksize(nc)
 1586                 break;
 1587             case MP_INT_WITHOUT_PREC:
 1588                 sprintf(cp, "*Z%c", cs1);
 1589                 while ((nc = mpfr_snprintf(obufout, ofre, cpbuf,
 1590                          (int) fw, zi)) >= ofre)
 1591                     chksize(nc)
 1592                 break;
 1593             case MP_FLOAT:
 1594                 sprintf(cp, "*.*R*%c", cs1);
 1595                 while ((nc = mpfr_snprintf(obufout, ofre, cpbuf,
 1596                          (int) fw, (int) prec, ROUND_MODE, mf)) >= ofre)
 1597                     chksize(nc)
 1598                 break;
 1599 #endif
 1600             default:
 1601                 if (have_prec || tolower(cs1) != 'a') {
 1602                     sprintf(cp, "*.*%c", cs1);
 1603                     while ((nc = snprintf(obufout, ofre, cpbuf,
 1604                              (int) fw, (int) prec,
 1605                              (double) tmpval)) >= ofre)
 1606                         chksize(nc)
 1607                 } else {
 1608                     // For %a and %A, use the default precision if it
 1609                     // wasn't supplied by the user.
 1610                     sprintf(cp, "*%c", cs1);
 1611                     while ((nc = snprintf(obufout, ofre, cpbuf,
 1612                              (int) fw,
 1613                              (double) tmpval)) >= ofre)
 1614                         chksize(nc)
 1615                 }
 1616             }
 1617 
 1618 #if defined(LC_NUMERIC)
 1619             if (quote_flag && ! use_lc_numeric)
 1620                 setlocale(LC_NUMERIC, "C");
 1621 #endif
 1622 
 1623             len = strlen(obufout);
 1624             ofre -= len;
 1625             obufout += len;
 1626             s0 = s1;
 1627             break;
 1628         default:
 1629             if (do_lint && is_alpha(cs1))
 1630                 lintwarn(_("ignoring unknown format specifier character `%c': no argument converted"), cs1);
 1631             break;
 1632         }
 1633         if (toofew) {
 1634             msg("%s\n\t`%s'\n\t%*s%s",
 1635                   _("fatal: not enough arguments to satisfy format string"),
 1636                   fmt_string, (int) (s1 - fmt_string - 1), "",
 1637                   _("^ ran out for this one"));
 1638             goto out;
 1639         }
 1640     }
 1641     if (do_lint) {
 1642         if (need_format)
 1643             lintwarn(
 1644             _("[s]printf: format specifier does not have control letter"));
 1645         if (cur_arg < num_args)
 1646             lintwarn(
 1647             _("too many arguments supplied for format string"));
 1648     }
 1649     bchunk(s0, s1 - s0);
 1650     olen_final = obufout - obuf;
 1651 #define GIVE_BACK_SIZE (INITIAL_OUT_SIZE * 2)
 1652     if (ofre > GIVE_BACK_SIZE)
 1653         erealloc(obuf, char *, olen_final + 1, "format_tree");
 1654     r = make_str_node(obuf, olen_final, ALREADY_MALLOCED);
 1655     obuf = NULL;
 1656 out:
 1657     {
 1658         size_t k;
 1659         size_t count = sizeof(cpbufs)/sizeof(cpbufs[0]);
 1660         for (k = 0; k < count; k++) {
 1661             if (cpbufs[k].buf != cpbufs[k].stackbuf)
 1662                 efree(cpbufs[k].buf);
 1663         }
 1664         if (obuf != NULL)
 1665             efree(obuf);
 1666     }
 1667 
 1668     if (r == NULL)
 1669         gawk_exit(EXIT_FATAL);
 1670     return r;
 1671 }
 1672 
 1673 
 1674 /* printf_common --- common code for sprintf and printf */
 1675 
 1676 static NODE *
 1677 printf_common(int nargs)
 1678 {
 1679     int i;
 1680     NODE *r, *tmp;
 1681 
 1682     assert(nargs > 0 && nargs <= max_args);
 1683     for (i = 1; i <= nargs; i++) {
 1684         tmp = args_array[nargs - i] = POP();
 1685         if (tmp->type == Node_var_array) {
 1686             while (--i > 0)
 1687                 DEREF(args_array[nargs - i]);
 1688             fatal(_("attempt to use array `%s' in a scalar context"), array_vname(tmp));
 1689         }
 1690     }
 1691 
 1692     args_array[0] = force_string(args_array[0]);
 1693     r = format_tree(args_array[0]->stptr, args_array[0]->stlen, args_array, nargs);
 1694     for (i = 0; i < nargs; i++)
 1695         DEREF(args_array[i]);
 1696     return r;
 1697 }
 1698 
 1699 /* do_sprintf --- perform sprintf */
 1700 
 1701 NODE *
 1702 do_sprintf(int nargs)
 1703 {
 1704     NODE *r;
 1705 
 1706     if (nargs == 0)
 1707         fatal(_("sprintf: no arguments"));
 1708 
 1709     r = printf_common(nargs);
 1710     if (r == NULL)
 1711         gawk_exit(EXIT_FATAL);
 1712     return r;
 1713 }
 1714 
 1715 
 1716 /* do_printf --- perform printf, including redirection */
 1717 
 1718 void
 1719 do_printf(int nargs, int redirtype)
 1720 {
 1721     FILE *fp = NULL;
 1722     NODE *tmp;
 1723     struct redirect *rp = NULL;
 1724     int errflg = 0;
 1725     NODE *redir_exp = NULL;
 1726 
 1727     if (nargs == 0) {
 1728         if (do_traditional) {
 1729             if (do_lint)
 1730                 lintwarn(_("printf: no arguments"));
 1731             if (redirtype != 0) {
 1732                 redir_exp = TOP();
 1733                 if (redir_exp->type != Node_val)
 1734                     fatal(_("attempt to use array `%s' in a scalar context"), array_vname(redir_exp));
 1735                 rp = redirect(redir_exp, redirtype, & errflg, true);
 1736                 DEREF(redir_exp);
 1737                 decr_sp();
 1738             }
 1739             return; /* bwk accepts it silently */
 1740         }
 1741         fatal(_("printf: no arguments"));
 1742     }
 1743 
 1744     if (redirtype != 0) {
 1745         redir_exp = PEEK(nargs);
 1746         if (redir_exp->type != Node_val)
 1747             fatal(_("attempt to use array `%s' in a scalar context"), array_vname(redir_exp));
 1748         rp = redirect(redir_exp, redirtype, & errflg, true);
 1749         if (rp != NULL) {
 1750             if ((rp->flag & RED_TWOWAY) != 0 && rp->output.fp == NULL) {
 1751                 if (is_non_fatal_redirect(redir_exp->stptr, redir_exp->stlen)) {
 1752                     update_ERRNO_int(EBADF);
 1753                     return;
 1754                 }
 1755                 (void) close_rp(rp, CLOSE_ALL);
 1756                 fatal(_("printf: attempt to write to closed write end of two-way pipe"));
 1757             }
 1758             fp = rp->output.fp;
 1759         }
 1760         else if (errflg) {
 1761             update_ERRNO_int(errflg);
 1762             return;
 1763         }
 1764     } else if (do_debug)    /* only the debugger can change the default output */
 1765         fp = output_fp;
 1766     else
 1767         fp = stdout;
 1768 
 1769     tmp = printf_common(nargs);
 1770     if (redir_exp != NULL) {
 1771         DEREF(redir_exp);
 1772         decr_sp();
 1773     }
 1774     if (tmp != NULL) {
 1775         if (fp == NULL) {
 1776             DEREF(tmp);
 1777             return;
 1778         }
 1779         efwrite(tmp->stptr, sizeof(char), tmp->stlen, fp, "printf", rp, true);
 1780         if (rp != NULL && (rp->flag & RED_TWOWAY) != 0)
 1781             rp->output.gawk_fflush(rp->output.fp, rp->output.opaque);
 1782         DEREF(tmp);
 1783     } else
 1784         gawk_exit(EXIT_FATAL);
 1785 }
 1786 
 1787 /* do_sqrt --- do the sqrt function */
 1788 
 1789 NODE *
 1790 do_sqrt(int nargs)
 1791 {
 1792     NODE *tmp;
 1793     double arg;
 1794 
 1795     tmp = POP_SCALAR();
 1796     if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0)
 1797         lintwarn(_("%s: received non-numeric argument"), "sqrt");
 1798     arg = (double) force_number(tmp)->numbr;
 1799     DEREF(tmp);
 1800     if (arg < 0.0)
 1801         warning(_("%s: received negative argument %g"), "sqrt", arg);
 1802     return make_number((AWKNUM) sqrt(arg));
 1803 }
 1804 
 1805 /* do_substr --- do the substr function */
 1806 
 1807 NODE *
 1808 do_substr(int nargs)
 1809 {
 1810     NODE *t1;
 1811     NODE *r;
 1812     size_t indx;
 1813     size_t length = 0;
 1814     double d_index = 0, d_length = 0;
 1815     size_t src_len;
 1816 
 1817     if (nargs == 3) {
 1818         t1 = POP_NUMBER();
 1819         d_length = get_number_d(t1);
 1820         DEREF(t1);
 1821     }
 1822 
 1823     t1 = POP_NUMBER();
 1824     d_index = get_number_d(t1);
 1825     DEREF(t1);
 1826 
 1827     t1 = POP_STRING();
 1828 
 1829     if (nargs == 3) {
 1830         if (! (d_length >= 1)) {
 1831             if (do_lint == DO_LINT_ALL)
 1832                 lintwarn(_("substr: length %g is not >= 1"), d_length);
 1833             else if (do_lint == DO_LINT_INVALID && ! (d_length >= 0))
 1834                 lintwarn(_("substr: length %g is not >= 0"), d_length);
 1835             DEREF(t1);
 1836             /*
 1837              * Return explicit null string instead of doing
 1838              * dupnode(Nnull_string) so that if the result
 1839              * is checked with the combination of length()
 1840              * and lint, no error is reported about using
 1841              * an uninitialized value. Same thing later, too.
 1842              */
 1843             return make_string("", 0);
 1844         }
 1845         if (do_lint) {
 1846             if (double_to_int(d_length) != d_length)
 1847                 lintwarn(
 1848             _("substr: non-integer length %g will be truncated"),
 1849                     d_length);
 1850 
 1851             if (d_length > SIZE_MAX)
 1852                 lintwarn(
 1853             _("substr: length %g too big for string indexing, truncating to %g"),
 1854                     d_length, (double) SIZE_MAX);
 1855         }
 1856         if (d_length < SIZE_MAX)
 1857             length = d_length;
 1858         else
 1859             length = SIZE_MAX;
 1860     }
 1861 
 1862     /* the weird `! (foo)' tests help catch NaN values. */
 1863     if (! (d_index >= 1)) {
 1864         if (do_lint)
 1865             lintwarn(_("substr: start index %g is invalid, using 1"),
 1866                  d_index);
 1867         d_index = 1;
 1868     }
 1869     if (do_lint && double_to_int(d_index) != d_index)
 1870         lintwarn(_("substr: non-integer start index %g will be truncated"),
 1871              d_index);
 1872 
 1873     /* awk indices are from 1, C's are from 0 */
 1874     if (d_index <= SIZE_MAX)
 1875         indx = d_index - 1;
 1876     else
 1877         indx = SIZE_MAX;
 1878 
 1879     if (nargs == 2) {   /* third arg. missing */
 1880         /* use remainder of string */
 1881         length = t1->stlen - indx;  /* default to bytes */
 1882         if (gawk_mb_cur_max > 1) {
 1883             t1 = force_wstring(t1);
 1884             if (t1->wstlen > 0) /* use length of wide char string if we have one */
 1885                 length = t1->wstlen - indx;
 1886         }
 1887         d_length = length;  /* set here in case used in diagnostics, below */
 1888     }
 1889 
 1890     if (t1->stlen == 0) {
 1891         /* substr("", 1, 0) produces a warning only if LINT_ALL */
 1892         if (do_lint && (do_lint == DO_LINT_ALL || ((indx | length) != 0)))
 1893             lintwarn(_("substr: source string is zero length"));
 1894         DEREF(t1);
 1895         return make_string("", 0);
 1896     }
 1897 
 1898     /* get total len of input string, for following checks */
 1899     if (gawk_mb_cur_max > 1) {
 1900         t1 = force_wstring(t1);
 1901         src_len = t1->wstlen;
 1902     } else
 1903         src_len = t1->stlen;
 1904 
 1905     if (indx >= src_len) {
 1906         if (do_lint)
 1907             lintwarn(_("substr: start index %g is past end of string"),
 1908                 d_index);
 1909         DEREF(t1);
 1910         return make_string("", 0);
 1911     }
 1912     if (length > src_len - indx) {
 1913         if (do_lint)
 1914             lintwarn(
 1915     _("substr: length %g at start index %g exceeds length of first argument (%lu)"),
 1916             d_length, d_index, (unsigned long int) src_len);
 1917         length = src_len - indx;
 1918     }
 1919 
 1920     /* force_wstring() already called */
 1921     if (gawk_mb_cur_max == 1 || t1->wstlen == t1->stlen)
 1922         /* single byte case */
 1923         r = make_string(t1->stptr + indx, length);
 1924     else {
 1925         /* multibyte case, more work */
 1926         size_t result;
 1927         wchar_t *wp;
 1928         mbstate_t mbs;
 1929         char *substr, *cp;
 1930 
 1931         /*
 1932          * Convert the wide chars in t1->wstptr back into m.b. chars.
 1933          * This is pretty grotty, but it's the most straightforward
 1934          * way to do things.
 1935          */
 1936         memset(& mbs, 0, sizeof(mbs));
 1937         emalloc(substr, char *, (length * gawk_mb_cur_max) + 1, "do_substr");
 1938         wp = t1->wstptr + indx;
 1939         for (cp = substr; length > 0; length--) {
 1940             result = wcrtomb(cp, *wp, & mbs);
 1941             if (result == (size_t) -1)  /* what to do? break seems best */
 1942                 break;
 1943             cp += result;
 1944             wp++;
 1945         }
 1946         *cp = '\0';
 1947         r = make_str_node(substr, cp - substr, ALREADY_MALLOCED);
 1948     }
 1949 
 1950     DEREF(t1);
 1951     return r;
 1952 }
 1953 
 1954 /* do_strftime --- format a time stamp */
 1955 
 1956 NODE *
 1957 do_strftime(int nargs)
 1958 {
 1959     NODE *t1, *t2, *t3, *ret;
 1960     struct tm *tm;
 1961     time_t fclock;
 1962     double clock_val;
 1963     char *bufp;
 1964     size_t buflen, bufsize;
 1965     char buf[BUFSIZ];
 1966     const char *format;
 1967     int formatlen;
 1968     bool do_gmt;
 1969     NODE *val = NULL;
 1970     NODE *sub = NULL;
 1971     char save = '\0';   // initialize to avoid compiler warnings
 1972     static const time_t time_t_min = TYPE_MINIMUM(time_t);
 1973     static const time_t time_t_max = TYPE_MAXIMUM(time_t);
 1974 
 1975     /* set defaults first */
 1976     format = def_strftime_format;   /* traditional date format */
 1977     formatlen = strlen(format);
 1978     (void) time(& fclock);  /* current time of day */
 1979     do_gmt = false;
 1980 
 1981     if (PROCINFO_node != NULL) {
 1982         sub = make_string("strftime", 8);
 1983         val = in_array(PROCINFO_node, sub);
 1984         unref(sub);
 1985 
 1986         if (val != NULL) {
 1987             if (do_lint && (fixtype(val)->flags & STRING) == 0)
 1988                 lintwarn(_("strftime: format value in PROCINFO[\"strftime\"] has numeric type"));
 1989             val = force_string(val);
 1990             format = val->stptr;
 1991             formatlen = val->stlen;
 1992         }
 1993     }
 1994 
 1995     t1 = t2 = t3 = NULL;
 1996     if (nargs > 0) {    /* have args */
 1997         NODE *tmp;
 1998 
 1999         if (nargs == 3) {
 2000             t3 = POP_SCALAR();
 2001             do_gmt = boolval(t3);
 2002             DEREF(t3);
 2003         }
 2004 
 2005         if (nargs >= 2) {
 2006             t2 = POP_SCALAR();
 2007             if (do_lint && (fixtype(t2)->flags & NUMBER) == 0)
 2008                 lintwarn(_("%s: received non-numeric second argument"), "strftime");
 2009             (void) force_number(t2);
 2010             clock_val = get_number_d(t2);
 2011             fclock = (time_t) clock_val;
 2012             /*
 2013              * Protect against negative value being assigned
 2014              * to unsigned time_t.
 2015              */
 2016             if (clock_val < 0 && fclock > 0) {
 2017                 if (do_lint)
 2018                     lintwarn(_("strftime: second argument less than 0 or too big for time_t"));
 2019                 return make_string("", 0);
 2020             }
 2021 
 2022             /* And check that the value is in range */
 2023             if (clock_val < time_t_min || clock_val > time_t_max) {
 2024                 if (do_lint)
 2025                     lintwarn(_("strftime: second argument out of range for time_t"));
 2026                 return make_string("", 0);
 2027             }
 2028 
 2029             DEREF(t2);
 2030         }
 2031 
 2032         tmp = POP_SCALAR();
 2033         if (do_lint && (fixtype(tmp)->flags & STRING) == 0)
 2034             lintwarn(_("%s: received non-string first argument"), "strftime");
 2035 
 2036         t1 = force_string(tmp);
 2037         format = t1->stptr;
 2038         formatlen = t1->stlen;
 2039         if (formatlen == 0) {
 2040             if (do_lint)
 2041                 lintwarn(_("strftime: received empty format string"));
 2042             DEREF(t1);
 2043             return make_string("", 0);
 2044         }
 2045         str_terminate(t1, save);
 2046     }
 2047 
 2048     if (do_gmt)
 2049         tm = gmtime(& fclock);
 2050     else
 2051         tm = localtime(& fclock);
 2052 
 2053     if (tm == NULL) {
 2054         ret = make_string("", 0);
 2055         goto done;
 2056     }
 2057 
 2058     bufp = buf;
 2059     bufsize = sizeof(buf);
 2060     for (;;) {
 2061         *bufp = '\0';
 2062         buflen = strftime(bufp, bufsize, format, tm);
 2063         /*
 2064          * buflen can be zero EITHER because there's not enough
 2065          * room in the string, or because the control command
 2066          * goes to the empty string. Make a reasonable guess that
 2067          * if the buffer is 1024 times bigger than the length of the
 2068          * format string, it's not failing for lack of room.
 2069          * Thanks to Paul Eggert for pointing out this issue.
 2070          */
 2071         if (buflen > 0 || bufsize >= 1024 * formatlen)
 2072             break;
 2073         bufsize *= 2;
 2074         if (bufp == buf)
 2075             emalloc(bufp, char *, bufsize, "do_strftime");
 2076         else
 2077             erealloc(bufp, char *, bufsize, "do_strftime");
 2078     }
 2079     ret = make_string(bufp, buflen);
 2080     if (bufp != buf)
 2081         efree(bufp);
 2082 done:
 2083     if (t1) {
 2084         str_restore(t1, save);
 2085         DEREF(t1);
 2086     }
 2087     return ret;
 2088 }
 2089 
 2090 /* do_systime --- get the time of day */
 2091 
 2092 NODE *
 2093 do_systime(int nargs ATTRIBUTE_UNUSED)
 2094 {
 2095     time_t lclock;
 2096 
 2097     (void) time(& lclock);
 2098     return make_number((AWKNUM) lclock);
 2099 }
 2100 
 2101 /* do_mktime --- turn a time string into a timestamp */
 2102 
 2103 NODE *
 2104 do_mktime(int nargs)
 2105 {
 2106     NODE *t1, *t2;
 2107     struct tm then;
 2108     long year;
 2109     int month, day, hour, minute, second, count;
 2110     int dst = -1; /* default is unknown */
 2111     time_t then_stamp;
 2112     char save;
 2113     bool do_gmt;
 2114 
 2115     if (nargs == 2) {
 2116         t2 = POP_SCALAR();
 2117         do_gmt = boolval(t2);
 2118         DEREF(t2);
 2119     }
 2120     else
 2121         do_gmt = false;
 2122     t1 = POP_SCALAR();
 2123     if (do_lint && (fixtype(t1)->flags & STRING) == 0)
 2124         lintwarn(_("%s: received non-string argument"), "mktime");
 2125     t1 = force_string(t1);
 2126 
 2127     save = t1->stptr[t1->stlen];
 2128     t1->stptr[t1->stlen] = '\0';
 2129 
 2130     count = sscanf(t1->stptr, "%ld %d %d %d %d %d %d",
 2131                 & year, & month, & day,
 2132             & hour, & minute, & second,
 2133                 & dst);
 2134 
 2135     if (   do_lint /* Ready? Set! Go: */
 2136         && (   (second < 0 || second > 60)
 2137         || (minute < 0 || minute > 59)
 2138         || (hour < 0 || hour > 23) /* FIXME ISO 8601 allows 24 ? */
 2139         || (day < 1 || day > 31)
 2140         || (month < 1 || month > 12) ))
 2141             lintwarn(_("mktime: at least one of the values is out of the default range"));
 2142 
 2143     t1->stptr[t1->stlen] = save;
 2144     DEREF(t1);
 2145 
 2146     if (count < 6
 2147         || month == INT_MIN
 2148         || year < INT_MIN + 1900
 2149         || year - 1900 > INT_MAX)
 2150         return make_number((AWKNUM) -1);
 2151 
 2152     memset(& then, '\0', sizeof(then));
 2153     then.tm_sec = second;
 2154     then.tm_min = minute;
 2155     then.tm_hour = hour;
 2156     then.tm_mday = day;
 2157     then.tm_mon = month - 1;
 2158     then.tm_year = year - 1900;
 2159     then.tm_isdst = dst;
 2160 
 2161     then_stamp = (do_gmt ? timegm(& then) : mktime(& then));
 2162     return make_number((AWKNUM) then_stamp);
 2163 }
 2164 
 2165 /* do_system --- run an external command */
 2166 
 2167 NODE *
 2168 do_system(int nargs)
 2169 {
 2170     NODE *tmp;
 2171     AWKNUM ret = 0;     /* floating point on purpose, compat Unix awk */
 2172     char *cmd;
 2173     char save;
 2174     int status;
 2175 
 2176     if (do_sandbox)
 2177         fatal(_("'system' function not allowed in sandbox mode"));
 2178 
 2179     (void) flush_io();     /* so output is synchronous with gawk's */
 2180     tmp = POP_SCALAR();
 2181     if (do_lint && (fixtype(tmp)->flags & STRING) == 0)
 2182         lintwarn(_("%s: received non-string argument"), "system");
 2183     cmd = force_string(tmp)->stptr;
 2184 
 2185     if (cmd && *cmd) {
 2186         /* insure arg to system is zero-terminated */
 2187         save = cmd[tmp->stlen];
 2188         cmd[tmp->stlen] = '\0';
 2189 
 2190         os_restore_mode(fileno(stdin));
 2191         set_sigpipe_to_default();
 2192 
 2193         status = system(cmd);
 2194         /*
 2195          * 3/2016. What to do with ret? It's never simple.
 2196          * POSIX says to use the full return value. BWK awk
 2197          * divides the result by 256.  That normally gives the
 2198          * exit status but gives a weird result for death-by-signal.
 2199          * So we compromise as follows:
 2200          */
 2201         ret = status;
 2202         if (status != -1) {
 2203             if (do_posix)
 2204                 ;   /* leave it alone, full 16 bits */
 2205             else if (do_traditional)
 2206 #ifdef __MINGW32__
 2207                 ret = (((unsigned)status) & ~0xC0000000);
 2208 #else
 2209                 ret = (status / 256.0);
 2210 #endif
 2211             else
 2212                 ret = sanitize_exit_status(status);
 2213         }
 2214 
 2215         if ((BINMODE & BINMODE_INPUT) != 0)
 2216             os_setbinmode(fileno(stdin), O_BINARY);
 2217         ignore_sigpipe();
 2218 
 2219         cmd[tmp->stlen] = save;
 2220     }
 2221     DEREF(tmp);
 2222     return make_number((AWKNUM) ret);
 2223 }
 2224 
 2225 /* do_print --- print items, separated by OFS, terminated with ORS */
 2226 
 2227 void
 2228 do_print(int nargs, int redirtype)
 2229 {
 2230     struct redirect *rp = NULL;
 2231     int errflg = 0;
 2232     FILE *fp = NULL;
 2233     int i;
 2234     NODE *redir_exp = NULL;
 2235     NODE *tmp = NULL;
 2236 
 2237     assert(nargs <= max_args);
 2238 
 2239     if (redirtype != 0) {
 2240         redir_exp = PEEK(nargs);
 2241         if (redir_exp->type != Node_val)
 2242             fatal(_("attempt to use array `%s' in a scalar context"), array_vname(redir_exp));
 2243         rp = redirect(redir_exp, redirtype, & errflg, true);
 2244         if (rp != NULL) {
 2245             if ((rp->flag & RED_TWOWAY) != 0 && rp->output.fp == NULL) {
 2246                 if (is_non_fatal_redirect(redir_exp->stptr, redir_exp->stlen)) {
 2247                     update_ERRNO_int(EBADF);
 2248                     return;
 2249                 }
 2250                 (void) close_rp(rp, CLOSE_ALL);
 2251                 fatal(_("print: attempt to write to closed write end of two-way pipe"));
 2252             }
 2253             fp = rp->output.fp;
 2254         }
 2255         else if (errflg) {
 2256             update_ERRNO_int(errflg);
 2257             return;
 2258         }
 2259     } else if (do_debug)    /* only the debugger can change the default output */
 2260         fp = output_fp;
 2261     else
 2262         fp = stdout;
 2263 
 2264     for (i = 1; i <= nargs; i++) {
 2265         tmp = args_array[i] = POP();
 2266         if (tmp->type == Node_var_array) {
 2267             while (--i > 0)
 2268                 DEREF(args_array[i]);
 2269             fatal(_("attempt to use array `%s' in a scalar context"), array_vname(tmp));
 2270         }
 2271         // Let force_string_ofmt handle checking if things
 2272         // are already valid.
 2273         args_array[i] = force_string_ofmt(tmp);
 2274         if (args_array[i] != tmp)
 2275             DEREF(tmp);
 2276     }
 2277 
 2278     if (redir_exp != NULL) {
 2279         DEREF(redir_exp);
 2280         decr_sp();
 2281     }
 2282 
 2283     if (fp == NULL) {
 2284         for (i = nargs; i > 0; i--)
 2285             DEREF(args_array[i]);
 2286         return;
 2287     }
 2288 
 2289     for (i = nargs; i > 0; i--) {
 2290         efwrite(args_array[i]->stptr, sizeof(char), args_array[i]->stlen, fp, "print", rp, false);
 2291         DEREF(args_array[i]);
 2292         if (i != 1 && OFSlen > 0)
 2293             efwrite(OFS, sizeof(char), (size_t) OFSlen,
 2294                 fp, "print", rp, false);
 2295 
 2296     }
 2297     if (ORSlen > 0)
 2298         efwrite(ORS, sizeof(char), (size_t) ORSlen, fp, "print", rp, true);
 2299 
 2300     if (rp != NULL && (rp->flag & RED_TWOWAY) != 0)
 2301         rp->output.gawk_fflush(rp->output.fp, rp->output.opaque);
 2302 }
 2303 
 2304 /* do_print_rec --- special case printing of $0, for speed */
 2305 
 2306 void
 2307 do_print_rec(int nargs, int redirtype)
 2308 {
 2309     FILE *fp = NULL;
 2310     NODE *f0;
 2311     struct redirect *rp = NULL;
 2312     int errflg = 0;
 2313     NODE *redir_exp = NULL;
 2314 
 2315     assert(nargs == 0);
 2316     if (redirtype != 0) {
 2317         redir_exp = TOP();
 2318         rp = redirect(redir_exp, redirtype, & errflg, true);
 2319         if (rp != NULL) {
 2320             if ((rp->flag & RED_TWOWAY) != 0 && rp->output.fp == NULL) {
 2321                 if (is_non_fatal_redirect(redir_exp->stptr, redir_exp->stlen)) {
 2322                     update_ERRNO_int(EBADF);
 2323                     return;
 2324                 }
 2325                 (void) close_rp(rp, CLOSE_ALL);
 2326                 fatal(_("print: attempt to write to closed write end of two-way pipe"));
 2327             }
 2328             fp = rp->output.fp;
 2329         }
 2330         DEREF(redir_exp);
 2331         decr_sp();
 2332     } else
 2333         fp = output_fp;
 2334 
 2335     if (errflg) {
 2336         update_ERRNO_int(errflg);
 2337         return;
 2338     }
 2339 
 2340     if (fp == NULL)
 2341         return;
 2342 
 2343     if (! field0_valid || do_lint)  // lint check for field access in END
 2344         (void) get_field(0L, NULL);
 2345 
 2346     f0 = fields_arr[0];
 2347 
 2348     if (do_lint && (f0->flags & NULL_FIELD) != 0)
 2349         lintwarn(_("reference to uninitialized field `$%d'"), 0);
 2350 
 2351     efwrite(f0->stptr, sizeof(char), f0->stlen, fp, "print", rp, false);
 2352 
 2353     if (ORSlen > 0)
 2354         efwrite(ORS, sizeof(char), (size_t) ORSlen, fp, "print", rp, true);
 2355 
 2356     if (rp != NULL && (rp->flag & RED_TWOWAY) != 0)
 2357         rp->output.gawk_fflush(rp->output.fp, rp->output.opaque);
 2358 }
 2359 
 2360 
 2361 /* is_wupper --- function version of iswupper for passing function pointers */
 2362 
 2363 static int
 2364 is_wupper(wchar_t c)
 2365 {
 2366     return iswupper(c);
 2367 }
 2368 
 2369 /* is_wlower --- function version of iswlower for passing function pointers */
 2370 
 2371 static int
 2372 is_wlower(wchar_t c)
 2373 {
 2374     return iswlower(c);
 2375 }
 2376 
 2377 /* to_wupper --- function version of towupper for passing function pointers */
 2378 
 2379 static int
 2380 to_wlower(wchar_t c)
 2381 {
 2382     return towlower(c);
 2383 }
 2384 
 2385 /* to_wlower --- function version of towlower for passing function pointers */
 2386 
 2387 static int
 2388 to_wupper(wchar_t c)
 2389 {
 2390     return towupper(c);
 2391 }
 2392 
 2393 /* wide_change_case --- generic case converter for wide characters */
 2394 
 2395 static void
 2396 wide_change_case(wchar_t *wstr,
 2397             size_t wlen,
 2398             int (*is_x)(wchar_t c),
 2399             int (*to_y)(wchar_t c))
 2400 {
 2401     size_t i;
 2402     wchar_t *wcp;
 2403 
 2404     for (i = 0, wcp = wstr; i < wlen; i++, wcp++)
 2405         if (is_x(*wcp))
 2406             *wcp = to_y(*wcp);
 2407 }
 2408 
 2409 /* wide_toupper --- map a wide string to upper case */
 2410 
 2411 static void
 2412 wide_toupper(wchar_t *wstr, size_t wlen)
 2413 {
 2414     wide_change_case(wstr, wlen, is_wlower, to_wupper);
 2415 }
 2416 
 2417 /* wide_tolower --- map a wide string to lower case */
 2418 
 2419 static void
 2420 wide_tolower(wchar_t *wstr, size_t wlen)
 2421 {
 2422     wide_change_case(wstr, wlen, is_wupper, to_wlower);
 2423 }
 2424 
 2425 /* do_tolower --- lower case a string */
 2426 
 2427 NODE *
 2428 do_tolower(int nargs)
 2429 {
 2430     NODE *t1, *t2;
 2431 
 2432     t1 = POP_SCALAR();
 2433     if (do_lint && (fixtype(t1)->flags & STRING) == 0)
 2434         lintwarn(_("%s: received non-string argument"), "tolower");
 2435     t1 = force_string(t1);
 2436     t2 = make_string(t1->stptr, t1->stlen);
 2437 
 2438     if (gawk_mb_cur_max == 1) {
 2439         unsigned char *cp, *cp2;
 2440 
 2441         for (cp = (unsigned char *)t2->stptr,
 2442              cp2 = (unsigned char *)(t2->stptr + t2->stlen);
 2443             cp < cp2; cp++)
 2444             if (isupper(*cp))
 2445                 *cp = tolower(*cp);
 2446     } else {
 2447         force_wstring(t2);
 2448         wide_tolower(t2->wstptr, t2->wstlen);
 2449         wstr2str(t2);
 2450     }
 2451 
 2452     DEREF(t1);
 2453     return t2;
 2454 }
 2455 
 2456 /* do_toupper --- upper case a string */
 2457 
 2458 NODE *
 2459 do_toupper(int nargs)
 2460 {
 2461     NODE *t1, *t2;
 2462 
 2463     t1 = POP_SCALAR();
 2464     if (do_lint && (fixtype(t1)->flags & STRING) == 0)
 2465         lintwarn(_("%s: received non-string argument"), "toupper");
 2466     t1 = force_string(t1);
 2467     t2 = make_string(t1->stptr, t1->stlen);
 2468 
 2469     if (gawk_mb_cur_max == 1) {
 2470         unsigned char *cp, *cp2;
 2471 
 2472         for (cp = (unsigned char *)t2->stptr,
 2473              cp2 = (unsigned char *)(t2->stptr + t2->stlen);
 2474             cp < cp2; cp++)
 2475             if (islower(*cp))
 2476                 *cp = toupper(*cp);
 2477     } else {
 2478         force_wstring(t2);
 2479         wide_toupper(t2->wstptr, t2->wstlen);
 2480         wstr2str(t2);
 2481     }
 2482 
 2483     DEREF(t1);
 2484     return t2;
 2485 }
 2486 
 2487 /* do_atan2 --- do the atan2 function */
 2488 
 2489 NODE *
 2490 do_atan2(int nargs)
 2491 {
 2492     NODE *t1, *t2;
 2493     double d1, d2;
 2494 
 2495     POP_TWO_SCALARS(t1, t2);
 2496     if (do_lint) {
 2497         if ((fixtype(t1)->flags & NUMBER) == 0)
 2498             lintwarn(_("%s: received non-numeric first argument"), "atan2");
 2499         if ((fixtype(t2)->flags & NUMBER) == 0)
 2500             lintwarn(_("%s: received non-numeric second argument"), "atan2");
 2501     }
 2502     d1 = force_number(t1)->numbr;
 2503     d2 = force_number(t2)->numbr;
 2504     DEREF(t1);
 2505     DEREF(t2);
 2506     return make_number((AWKNUM) atan2(d1, d2));
 2507 }
 2508 
 2509 /* do_sin --- do the sin function */
 2510 
 2511 NODE *
 2512 do_sin(int nargs)
 2513 {
 2514     NODE *tmp;
 2515     double d;
 2516 
 2517     tmp = POP_SCALAR();
 2518     if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0)
 2519         lintwarn(_("%s: received non-numeric argument"), "sin");
 2520     d = sin((double) force_number(tmp)->numbr);
 2521     DEREF(tmp);
 2522     return make_number((AWKNUM) d);
 2523 }
 2524 
 2525 /* do_cos --- do the cos function */
 2526 
 2527 NODE *
 2528 do_cos(int nargs)
 2529 {
 2530     NODE *tmp;
 2531     double d;
 2532 
 2533     tmp = POP_SCALAR();
 2534     if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0)
 2535         lintwarn(_("%s: received non-numeric argument"), "cos");
 2536     d = cos((double) force_number(tmp)->numbr);
 2537     DEREF(tmp);
 2538     return make_number((AWKNUM) d);
 2539 }
 2540 
 2541 /* do_rand --- do the rand function */
 2542 
 2543 static bool firstrand = true;
 2544 /* Some systems require this array to be integer aligned. Sigh. */
 2545 #define SIZEOF_STATE 256
 2546 static uint32_t istate[SIZEOF_STATE/sizeof(uint32_t)];
 2547 static char *const state = (char *const) istate;
 2548 
 2549 /* ARGSUSED */
 2550 NODE *
 2551 do_rand(int nargs ATTRIBUTE_UNUSED)
 2552 {
 2553     double tmprand;
 2554 #define RAND_DIVISOR ((double)GAWK_RANDOM_MAX+1.0)
 2555     if (firstrand) {
 2556         (void) initstate((unsigned) 1, state, SIZEOF_STATE);
 2557         /* don't need to srandom(1), initstate() does it for us. */
 2558         firstrand = false;
 2559         setstate(state);
 2560     }
 2561     /*
 2562      * Per historical practice and POSIX, return value N is
 2563      *
 2564      *  0 <= n < 1
 2565      */
 2566     /*
 2567      * Date: Wed, 28 Aug 2013 17:52:46 -0700
 2568      * From: Bob Jewett <jewett@bill.scs.agilent.com>
 2569      *
 2570      * Call random() twice to fill in more bits in the value
 2571      * of the double.  Also, there is a bug in random() such
 2572      * that when the values of successive values are combined
 2573      * like (rand1*rand2)^2, (rand3*rand4)^2,  ...  the
 2574      * resulting time series is not white noise.  The
 2575      * following also seems to fix that bug.
 2576      *
 2577      * The add/subtract 0.5 keeps small bits from filling
 2578      * below 2^-53 in the double, not that anyone should be
 2579      * looking down there.
 2580      *
 2581      * Date: Wed, 25 Sep 2013 10:45:38 -0600 (MDT)
 2582      * From: "Nelson H. F. Beebe" <beebe@math.utah.edu>
 2583      * (4) The code is typical of many published fragments for converting
 2584      *     from integer to floating-point, and I discuss the serious pitfalls
 2585      *     in my book, because it leads to platform-dependent behavior at the
 2586      *     end points of the interval [0,1]
 2587      *
 2588      * (5) the documentation in the gawk info node says
 2589      *
 2590      *     `rand()'
 2591      *   Return a random number.  The values of `rand()' are uniformly
 2592      *   distributed between zero and one.  The value could be zero but is
 2593      *   never one.(1)
 2594      *
 2595      *     The division by RAND_DIVISOR may not guarantee that 1.0 is never
 2596      *     returned: the programmer forgot the platform-dependent issue of
 2597      *     rounding.
 2598      *
 2599      * For points 4 and 5, the safe way is a loop:
 2600      *
 2601      *         double
 2602      *     rand(void)       // return value in [0.0, 1.0)
 2603      *         {
 2604      *      value = internal_rand();
 2605      *
 2606      *      while (value == 1.0)
 2607      *                 value = internal_rand();
 2608      *
 2609      *      return (value);
 2610      *         }
 2611      */
 2612 
 2613     do {
 2614         long d1, d2;
 2615         /*
 2616          * Do the calls in predictable order to avoid
 2617          * compiler differences in order of evaluation.
 2618          */
 2619         d1 = random();
 2620         d2 = random();
 2621         tmprand = 0.5 + ( (d1/RAND_DIVISOR + d2) / RAND_DIVISOR );
 2622         tmprand -= 0.5;
 2623     } while (tmprand == 1.0);
 2624 
 2625     return make_number((AWKNUM) tmprand);
 2626 }
 2627 
 2628 /* do_srand --- seed the random number generator */
 2629 
 2630 NODE *
 2631 do_srand(int nargs)
 2632 {
 2633     NODE *tmp;
 2634     static long save_seed = 1;
 2635     long ret = save_seed;   /* SVR4 awk srand returns previous seed */
 2636 
 2637     if (firstrand) {
 2638         (void) initstate((unsigned) 1, state, SIZEOF_STATE);
 2639         /* don't need to srandom(1), we're changing the seed below */
 2640         firstrand = false;
 2641         (void) setstate(state);
 2642     }
 2643 
 2644     if (nargs == 0)
 2645         srandom((unsigned int) (save_seed = (long) time((time_t *) 0)));
 2646     else {
 2647         tmp = POP_SCALAR();
 2648         if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0)
 2649             lintwarn(_("%s: received non-numeric argument"), "srand");
 2650         srandom((unsigned int) (save_seed = (long) force_number(tmp)->numbr));
 2651         DEREF(tmp);
 2652     }
 2653     return make_number((AWKNUM) ret);
 2654 }
 2655 
 2656 /* do_match --- match a regexp, set RSTART and RLENGTH,
 2657  *  optional third arg is array filled with text of
 2658  *  subpatterns enclosed in parens and start and len info.
 2659  */
 2660 
 2661 NODE *
 2662 do_match(int nargs)
 2663 {
 2664     NODE *tre, *t1, *dest, *it;
 2665     int rstart, len, ii;
 2666     int rlength;
 2667     Regexp *rp;
 2668     regoff_t s;
 2669     char *start;
 2670     char *buf = NULL;
 2671     char buff[100];
 2672     size_t amt, oldamt = 0, ilen, slen;
 2673     char *subsepstr;
 2674     size_t subseplen;
 2675 
 2676     dest = NULL;
 2677     if (nargs == 3) {   /* 3rd optional arg for the subpatterns */
 2678         dest = POP_PARAM();
 2679         if (dest->type != Node_var_array)
 2680             fatal(_("match: third argument is not an array"));
 2681         assoc_clear(dest);
 2682     }
 2683     tre = POP();
 2684     rp = re_update(tre);
 2685     t1 = POP_STRING();
 2686 
 2687     rstart = research(rp, t1->stptr, 0, t1->stlen, RE_NEED_START);
 2688     if (rstart >= 0) {  /* match succeded */
 2689         size_t *wc_indices = NULL;
 2690 
 2691         rlength = REEND(rp, t1->stptr) - RESTART(rp, t1->stptr);    /* byte length */
 2692         if (rlength > 0 && gawk_mb_cur_max > 1) {
 2693             t1 = str2wstr(t1, & wc_indices);
 2694             rlength = wc_indices[rstart + rlength - 1] - wc_indices[rstart] + 1;
 2695             rstart = wc_indices[rstart];
 2696         }
 2697 
 2698         rstart++;   /* now it's 1-based indexing */
 2699 
 2700         /* Build the array only if the caller wants the optional subpatterns */
 2701         if (dest != NULL) {
 2702             subsepstr = SUBSEP_node->var_value->stptr;
 2703             subseplen = SUBSEP_node->var_value->stlen;
 2704 
 2705             for (ii = 0; ii < NUMSUBPATS(rp, t1->stptr); ii++) {
 2706                 /*
 2707                  * Loop over all the subpats; some of them may have
 2708                  * matched even if all of them did not.
 2709                  */
 2710                 if ((s = SUBPATSTART(rp, t1->stptr, ii)) != -1) {
 2711                     size_t subpat_start;
 2712                     size_t subpat_len;
 2713 
 2714                     start = t1->stptr + s;
 2715                     subpat_start = s;
 2716                     subpat_len = len = SUBPATEND(rp, t1->stptr, ii) - s;
 2717                     if (len > 0 && gawk_mb_cur_max > 1) {
 2718                         subpat_start = wc_indices[s];
 2719                         subpat_len = wc_indices[s + len - 1] - subpat_start + 1;
 2720                     }
 2721 
 2722                     it = make_string(start, len);
 2723                     it->flags |= USER_INPUT;
 2724                     assoc_set(dest, make_number((AWKNUM) (ii)), it);;
 2725 
 2726                     sprintf(buff, "%d", ii);
 2727                     ilen = strlen(buff);
 2728                     amt = ilen + subseplen + strlen("length") + 1;
 2729 
 2730                     if (oldamt == 0) {
 2731                         emalloc(buf, char *, amt, "do_match");
 2732                     } else if (amt > oldamt) {
 2733                         erealloc(buf, char *, amt, "do_match");
 2734                     }
 2735                     oldamt = amt;
 2736                     memcpy(buf, buff, ilen);
 2737                     memcpy(buf + ilen, subsepstr, subseplen);
 2738                     memcpy(buf + ilen + subseplen, "start", 6);
 2739 
 2740                     slen = ilen + subseplen + 5;
 2741 
 2742                     assoc_set(dest, make_string(buf, slen), make_number((AWKNUM) subpat_start + 1));
 2743 
 2744                     memcpy(buf, buff, ilen);
 2745                     memcpy(buf + ilen, subsepstr, subseplen);
 2746                     memcpy(buf + ilen + subseplen, "length", 7);
 2747 
 2748                     slen = ilen + subseplen + 6;
 2749 
 2750                     assoc_set(dest, make_string(buf, slen), make_number((AWKNUM) subpat_len));
 2751                 }
 2752             }
 2753 
 2754             efree(buf);
 2755         }
 2756         if (wc_indices != NULL)
 2757             efree(wc_indices);
 2758     } else {        /* match failed */
 2759         rstart = 0;
 2760         rlength = -1;
 2761     }
 2762 
 2763     DEREF(t1);
 2764     unref(RSTART_node->var_value);
 2765     RSTART_node->var_value = make_number((AWKNUM) rstart);
 2766     unref(RLENGTH_node->var_value);
 2767     RLENGTH_node->var_value = make_number((AWKNUM) rlength);
 2768     return make_number((AWKNUM) rstart);
 2769 }
 2770 
 2771 /* do_sub --- do the work for sub, gsub, and gensub */
 2772 
 2773 /*
 2774  * Gsub can be tricksy; particularly when handling the case of null strings.
 2775  * The following awk code was useful in debugging problems.  It is too bad
 2776  * that it does not readily translate directly into the C code, below.
 2777  *
 2778  * #! /usr/local/bin/mawk -f
 2779  *
 2780  * BEGIN {
 2781  *  true = 1; false = 0
 2782  *  print "--->", mygsub("abc", "b+", "FOO")
 2783  *  print "--->", mygsub("abc", "x*", "X")
 2784  *  print "--->", mygsub("abc", "b*", "X")
 2785  *  print "--->", mygsub("abc", "c", "X")
 2786  *  print "--->", mygsub("abc", "c+", "X")
 2787  *  print "--->", mygsub("abc", "x*$", "X")
 2788  * }
 2789  *
 2790  * function mygsub(str, regex, replace, origstr, newstr, eosflag, nonzeroflag)
 2791  * {
 2792  *  origstr = str;
 2793  *  eosflag = nonzeroflag = false
 2794  *  while (match(str, regex)) {
 2795  *      if (RLENGTH > 0) {  # easy case
 2796  *          nonzeroflag = true
 2797  *          if (RSTART == 1) {  # match at front of string
 2798  *              newstr = newstr replace
 2799  *          } else {
 2800  *              newstr = newstr substr(str, 1, RSTART-1) replace
 2801  *          }
 2802  *          str = substr(str, RSTART+RLENGTH)
 2803  *      } else if (nonzeroflag) {
 2804  *          # last match was non-zero in length, and at the
 2805  *          # current character, we get a zero length match,
 2806  *          # which we don't really want, so skip over it
 2807  *          newstr = newstr substr(str, 1, 1)
 2808  *          str = substr(str, 2)
 2809  *          nonzeroflag = false
 2810  *      } else {
 2811  *          # 0-length match
 2812  *          if (RSTART == 1) {
 2813  *              newstr = newstr replace substr(str, 1, 1)
 2814  *              str = substr(str, 2)
 2815  *          } else {
 2816  *              return newstr str replace
 2817  *          }
 2818  *      }
 2819  *      if (length(str) == 0)
 2820  *          if (eosflag)
 2821  *              break
 2822  *          else
 2823  *              eosflag = true
 2824  *  }
 2825  *  if (length(str) > 0)
 2826  *      newstr = newstr str # rest of string
 2827  *
 2828  *  return newstr
 2829  * }
 2830  */
 2831 
 2832 /*
 2833  * 1/2004:  The gawk sub/gsub behavior dates from 1996, when we proposed it
 2834  * for POSIX.  The proposal fell through the cracks, and the 2001 POSIX
 2835  * standard chose a more simple behavior.
 2836  *
 2837  * The relevant text is to be found on lines 6394-6407 (pages 166, 167) of the
 2838  * 2001 standard:
 2839  *
 2840  * sub(ere, repl[, in ])
 2841  *  Substitute the string repl in place of the first instance of the
 2842  *  extended regular expression ERE in string in and return the number of
 2843  *  substitutions. An ampersand ('&') appearing in the string repl shall
 2844  *  be replaced by the string from in that matches the ERE. An ampersand
 2845  *  preceded with a backslash ('\') shall be interpreted as the literal
 2846  *  ampersand character. An occurrence of two consecutive backslashes shall
 2847  *  be interpreted as just a single literal backslash character. Any other
 2848  *  occurrence of a backslash (for example, preceding any other character)
 2849  *  shall be treated as a literal backslash character. Note that if repl is a
 2850  *  string literal (the lexical token STRING; see Grammar (on page 170)), the
 2851  *  handling of the ampersand character occurs after any lexical processing,
 2852  *  including any lexical backslash escape sequence processing. If in is
 2853  *  specified and it is not an lvalue (see Expressions in awk (on page 156)),
 2854  *  the behavior is undefined. If in is omitted, awk shall use the current
 2855  *  record ($0) in its place.
 2856  *
 2857  * 11/2010: The text in the 2008 standard is the same as just quoted.
 2858  * However, POSIX behavior is now the default.  This can change the behavior
 2859  * of awk programs.  The old behavior is not available.
 2860  *
 2861  * 7/2011: Reverted backslash handling to what it used to be. It was in
 2862  * gawk for too long. Should have known better.
 2863  */
 2864 
 2865 /*
 2866  * NB: `howmany' conflicts with a SunOS 4.x macro in <sys/param.h>.
 2867  */
 2868 
 2869 NODE *
 2870 do_sub(int nargs, unsigned int flags)
 2871 {
 2872     char *scan;
 2873     char *bp, *cp;
 2874     char *buf = NULL;
 2875     size_t buflen;
 2876     char *matchend;
 2877     size_t len;
 2878     char *matchstart;
 2879     char *text;
 2880     size_t textlen = 0;
 2881     char *repl;
 2882     char *replend;
 2883     size_t repllen;
 2884     int sofar;
 2885     int ampersands;
 2886     int matches = 0;
 2887     Regexp *rp;
 2888     NODE *rep_node;     /* replacement text */
 2889     NODE *target;       /* string to make sub. in; $0 if none given */
 2890     NODE *tmp;
 2891     NODE **lhs = NULL;
 2892     long how_many = 1;  /* one substitution for sub, also gensub default */
 2893     bool global;
 2894     long current;
 2895     bool lastmatchnonzero;
 2896     char *mb_indices = NULL;
 2897 
 2898     if ((flags & GENSUB) != 0) {
 2899         double d;
 2900         NODE *glob_flag;
 2901 
 2902         tmp = PEEK(3);
 2903         rp = re_update(tmp);
 2904 
 2905         target = POP_STRING();  /* original string */
 2906 
 2907         glob_flag = POP_SCALAR();   /* value of global flag */
 2908         if (   (glob_flag->flags & STRING) != 0
 2909             && glob_flag->stlen > 0
 2910             && (glob_flag->stptr[0] == 'g' || glob_flag->stptr[0] == 'G'))
 2911             how_many = -1;
 2912         else {
 2913             (void) force_number(glob_flag);
 2914             d = get_number_d(glob_flag);
 2915             if (d < 1)
 2916                 how_many = 1;
 2917             else if (d < LONG_MAX)
 2918                 how_many = d;
 2919             else
 2920                 how_many = LONG_MAX;
 2921             if (d <= 0) {
 2922                 (void) force_string(glob_flag);
 2923                 warning(_("gensub: third argument `%.*s' treated as 1"),
 2924                         (int) glob_flag->stlen,
 2925                         glob_flag->stptr);
 2926             }
 2927         }
 2928         DEREF(glob_flag);
 2929     } else {
 2930         /* take care of regexp early, in case re_update is fatal */
 2931 
 2932         tmp = PEEK(2);
 2933         rp = re_update(tmp);
 2934 
 2935         if ((flags & GSUB) != 0)
 2936             how_many = -1;
 2937 
 2938         /* original string */
 2939 
 2940         if ((flags & LITERAL) != 0)
 2941             target = POP_STRING();
 2942         else {
 2943             lhs = POP_ADDRESS();
 2944             target = force_string(*lhs);
 2945         }
 2946     }
 2947 
 2948     global = (how_many == -1);
 2949 
 2950     rep_node = POP_STRING();    /* replacement text */
 2951     decr_sp();      /* regexp, already updated above */
 2952 
 2953     /* do the search early to avoid work on non-match */
 2954     if (research(rp, target->stptr, 0, target->stlen, RE_NEED_START) == -1 ||
 2955             RESTART(rp, target->stptr) > target->stlen)
 2956         goto done;
 2957 
 2958     target->flags |= STRING;
 2959 
 2960     text = target->stptr;
 2961     textlen = target->stlen;
 2962 
 2963     repl = rep_node->stptr;
 2964     replend = repl + rep_node->stlen;
 2965     repllen = replend - repl;
 2966 
 2967     ampersands = 0;
 2968 
 2969     /*
 2970      * Some systems' malloc() can't handle being called with an
 2971      * argument of zero.  Thus we have to have some special case
 2972      * code to check for `repllen == 0'.  This can occur for
 2973      * something like:
 2974      *  sub(/foo/, "", mystring)
 2975      * for example.
 2976      */
 2977     if (gawk_mb_cur_max > 1 && repllen > 0) {
 2978         emalloc(mb_indices, char *, repllen * sizeof(char), "do_sub");
 2979         index_multibyte_buffer(repl, mb_indices, repllen);
 2980     }
 2981 
 2982     /* compute length of replacement string, number of ampersands */
 2983     for (scan = repl; scan < replend; scan++) {
 2984         if ((gawk_mb_cur_max == 1 || (repllen > 0 && mb_indices[scan - repl] == 1))
 2985             && (*scan == '&')) {
 2986             repllen--;
 2987             ampersands++;
 2988         } else if (*scan == '\\') {
 2989             if ((flags & GENSUB) != 0) {    /* gensub, behave sanely */
 2990                 if (isdigit((unsigned char) scan[1])) {
 2991                     ampersands++;
 2992                     scan++;
 2993                 } else {    /* \q for any q --> q */
 2994                     repllen--;
 2995                     scan++;
 2996                 }
 2997             } else if (do_posix) {
 2998                 /* \& --> &, \\ --> \ */
 2999                 if (scan[1] == '&' || scan[1] == '\\') {
 3000                     repllen--;
 3001                     scan++;
 3002                 } /* else
 3003                     leave alone, it goes into the output */
 3004             } else {
 3005                 /* gawk default behavior since 1996 */
 3006                 if (strncmp(scan, "\\\\\\&", 4) == 0
 3007                     || strncmp(scan, "\\\\\\\\", 4) == 0) { /* 2016: fixed */
 3008                     /* \\\& --> \& */
 3009                     /* \\\\ --> \\ */
 3010                     repllen -= 2;
 3011                     scan += 3;
 3012                 } else if (strncmp(scan, "\\\\&", 3) == 0) {
 3013                     /* \\& --> \<string> */
 3014                     ampersands++;
 3015                     repllen--;
 3016                     scan += 2;
 3017                 } else if (scan[1] == '&') {
 3018                     /* \& --> & */
 3019                     repllen--;
 3020                     scan++;
 3021                 } /* else
 3022                     leave alone, it goes into the output */
 3023             }
 3024         }
 3025     }
 3026 
 3027     lastmatchnonzero = false;
 3028 
 3029     /* guesstimate how much room to allocate; +1 forces > 0 */
 3030     buflen = textlen + (ampersands + 1) * repllen + 1;
 3031     emalloc(buf, char *, buflen + 1, "do_sub");
 3032     buf[buflen] = '\0';
 3033 
 3034     bp = buf;
 3035     for (current = 1;; current++) {
 3036         matches++;
 3037         matchstart = target->stptr + RESTART(rp, target->stptr);
 3038         matchend = target->stptr + REEND(rp, target->stptr);
 3039 
 3040         /*
 3041          * create the result, copying in parts of the original
 3042          * string. note that length of replacement string can
 3043          * vary since ampersand is actual text of regexp match.
 3044          */
 3045 
 3046         /*
 3047          * add 1 to len to handle "empty" case where
 3048          * matchend == matchstart and we force a match on a single
 3049          * char.  Use 'matchend - text' instead of 'matchstart - text'
 3050          * because we may not actually make any substitution depending
 3051          * on the 'global' and 'how_many' values.
 3052          */
 3053         len = matchend - text + repllen
 3054               + ampersands * (matchend - matchstart) + 1;
 3055         sofar = bp - buf;
 3056         while (buflen < (sofar + len + 1)) {
 3057             buflen *= 2;
 3058             erealloc(buf, char *, buflen, "sub_common");
 3059             bp = buf + sofar;
 3060         }
 3061         for (scan = text; scan < matchstart; scan++)
 3062             *bp++ = *scan;
 3063         if (global || current == how_many) {
 3064             /*
 3065              * If the current match matched the null string,
 3066              * and the last match didn't and did a replacement,
 3067              * and the match of the null string is at the front of
 3068              * the text (meaning right after end of the previous
 3069              * replacement), then skip this one.
 3070              */
 3071             if (matchstart == matchend
 3072                 && lastmatchnonzero
 3073                 && matchstart == text) {
 3074                 lastmatchnonzero = false;
 3075                 matches--;
 3076                 goto empty;
 3077             }
 3078             /*
 3079              * If replacing all occurrences, or this is the
 3080              * match we want, copy in the replacement text,
 3081              * making substitutions as we go.
 3082              */
 3083             for (scan = repl; scan < replend; scan++)
 3084                 if (*scan == '&'
 3085                     /*
 3086                      * Don't test repllen here. A simple "&" could
 3087                      * end up with repllen == 0.
 3088                      */
 3089                     && (gawk_mb_cur_max == 1
 3090                         || mb_indices[scan - repl] == 1)
 3091                 ) {
 3092                         for (cp = matchstart; cp < matchend; cp++)
 3093                                 *bp++ = *cp;
 3094                 } else if (*scan == '\\'
 3095                     && (gawk_mb_cur_max == 1
 3096                         || (repllen > 0 && mb_indices[scan - repl] == 1))
 3097                 ) {
 3098                     if (flags & GENSUB) {   /* gensub, behave sanely */
 3099                         if (isdigit((unsigned char) scan[1])) {
 3100                             int dig = scan[1] - '0';
 3101                             if (dig < NUMSUBPATS(rp, target->stptr) && SUBPATSTART(rp, tp->stptr, dig) != -1) {
 3102                                 char *start, *end;
 3103 
 3104                                 start = target->stptr
 3105                                       + SUBPATSTART(rp, target->stptr, dig);
 3106                                 end = target->stptr
 3107                                       + SUBPATEND(rp, target->stptr, dig);
 3108 
 3109                                 for (cp = start; cp < end; cp++)
 3110                                     *bp++ = *cp;
 3111                             }
 3112                             scan++;
 3113                         } else  /* \q for any q --> q */
 3114                             *bp++ = *++scan;
 3115                     } else if (do_posix) {
 3116                         /* \& --> &, \\ --> \ */
 3117                         if (scan[1] == '&' || scan[1] == '\\')
 3118                             scan++;
 3119                         *bp++ = *scan;
 3120                     } else {
 3121                         /* gawk default behavior since 1996 */
 3122                         if (strncmp(scan, "\\\\\\&", 4) == 0
 3123                             || strncmp(scan, "\\\\\\\\", 4) == 0) { /* 2016: fixed */
 3124                             /* \\\& --> \& */
 3125                             /* \\\\ --> \\ */
 3126                             *bp++ = '\\';
 3127                             *bp++ = scan[3];
 3128                             scan += 3;
 3129                         } else if (strncmp(scan, "\\\\&", 3) == 0) {
 3130                             /* \\& --> \<string> */
 3131                             *bp++ = '\\';
 3132                             for (cp = matchstart; cp < matchend; cp++)
 3133                                 *bp++ = *cp;
 3134                             scan += 2;
 3135                         } else if (scan[1] == '&') {
 3136                             /* \& --> & */
 3137                             *bp++ = '&';
 3138                             scan++;
 3139                         } else
 3140                             *bp++ = *scan;
 3141                     }
 3142                 } else
 3143                     *bp++ = *scan;
 3144             if (matchstart != matchend)
 3145                 lastmatchnonzero = true;
 3146         } else {
 3147             /*
 3148              * don't want this match, skip over it by copying
 3149              * in current text.
 3150              */
 3151             for (cp = matchstart; cp < matchend; cp++)
 3152                 *bp++ = *cp;
 3153         }
 3154     empty:
 3155         /* catch the case of gsub(//, "blah", whatever), i.e. empty regexp */
 3156         if (matchstart == matchend && matchend < text + textlen) {
 3157             *bp++ = *matchend;
 3158             matchend++;
 3159         }
 3160         textlen = text + textlen - matchend;
 3161         text = matchend;
 3162 
 3163 #if 0
 3164         if (bp - buf > sofar + len)
 3165             fprintf(stderr, "debug: len = %zu, but used %ld\n", len, (long)((bp - buf) - (long)sofar));
 3166 #endif
 3167 
 3168         if ((current >= how_many && ! global)
 3169             || ((long) textlen <= 0 && matchstart == matchend)
 3170             || research(rp, target->stptr, text - target->stptr, textlen, RE_NEED_START) == -1)
 3171             break;
 3172 
 3173     }
 3174     sofar = bp - buf;
 3175     if (buflen < (sofar + textlen + 1)) {
 3176         buflen = sofar + textlen + 1;
 3177         erealloc(buf, char *, buflen, "do_sub");
 3178         bp = buf + sofar;
 3179     }
 3180     /*
 3181      * Note that text == matchend, since that assignment is made before
 3182      * exiting the 'for' loop above. Thus we copy in the rest of the
 3183      * original string.
 3184      */
 3185     for (scan = text; scan < text + textlen; scan++)
 3186         *bp++ = *scan;
 3187     *bp = '\0';
 3188     textlen = bp - buf;
 3189 
 3190     if (mb_indices != NULL)
 3191         efree(mb_indices);
 3192 
 3193 done:
 3194     DEREF(rep_node);
 3195 
 3196     if ((matches == 0 || (flags & LITERAL) != 0) && buf != NULL) {
 3197         efree(buf);
 3198         buf = NULL;
 3199     }
 3200 
 3201     if (flags & GENSUB) {
 3202         if (matches > 0) {
 3203             /* return the result string */
 3204             DEREF(target);
 3205             assert(buf != NULL);
 3206             return make_str_node(buf, textlen, ALREADY_MALLOCED);
 3207         }
 3208 
 3209         /* return the original string */
 3210         return target;
 3211     }
 3212 
 3213     /* For a string literal, must not change the original string. */
 3214     if ((flags & LITERAL) != 0)
 3215         DEREF(target);
 3216     else if (matches > 0) {
 3217         unref(*lhs);
 3218         *lhs = make_str_node(buf, textlen, ALREADY_MALLOCED);
 3219     }
 3220 
 3221     return make_number((AWKNUM) matches);
 3222 }
 3223 
 3224 /* call_sub --- call do_sub indirectly */
 3225 
 3226 NODE *
 3227 call_sub(const char *name, int nargs)
 3228 {
 3229     unsigned int flags = 0;
 3230     NODE *regex, *replace, *glob_flag;
 3231     NODE **lhs, *rhs;
 3232     NODE *zero = make_number(0.0);
 3233     NODE *result;
 3234 
 3235     if (name[0] == 'g') {
 3236         if (name[1] == 'e')
 3237             flags = GENSUB;
 3238         else
 3239             flags = GSUB;
 3240     }
 3241 
 3242     bool need_free = false;
 3243     if (flags == 0 || flags == GSUB) {
 3244         /* sub or gsub */
 3245         if (nargs != 2)
 3246             fatal(_("%s: can be called indirectly only with two arguments"), name);
 3247 
 3248         replace = POP_STRING();
 3249         regex = POP();  /* the regex */
 3250         /*
 3251          * push regex
 3252          * push replace
 3253          * push $0
 3254          */
 3255         if ((regex->flags & REGEX) != 0)
 3256             regex = regex->typed_re;
 3257         else {
 3258             regex = make_regnode(Node_regex, regex);
 3259             need_free = true;
 3260         }
 3261         PUSH(regex);
 3262         PUSH(replace);
 3263         lhs = r_get_field(zero, (Func_ptr *) 0, true);
 3264         nargs++;
 3265         PUSH_ADDRESS(lhs);
 3266     } else {
 3267         /* gensub */
 3268         if (nargs == 4)
 3269             rhs = POP();
 3270         else
 3271             rhs = NULL;
 3272         glob_flag = POP_STRING();
 3273         replace = POP_STRING();
 3274         regex = POP();  /* the regex */
 3275         /*
 3276          * push regex
 3277          * push replace
 3278          * push glob_flag
 3279          * if (nargs = 3) {
 3280          *   push $0
 3281          *   nargs++
 3282          * }
 3283          */
 3284         if ((regex->flags & REGEX) != 0)
 3285             regex = regex->typed_re;
 3286         else {
 3287             regex = make_regnode(Node_regex, regex);
 3288             need_free = true;
 3289         }
 3290         PUSH(regex);
 3291         PUSH(replace);
 3292         PUSH(glob_flag);
 3293         if (rhs == NULL) {
 3294             lhs = r_get_field(zero, (Func_ptr *) 0, true);
 3295             rhs = *lhs;
 3296             UPREF(rhs);
 3297             PUSH(rhs);
 3298             nargs++;
 3299         }
 3300         else
 3301             PUSH(rhs);
 3302     }
 3303 
 3304     unref(zero);
 3305     result = do_sub(nargs, flags);
 3306 
 3307     if (need_free) {
 3308         refree(regex->re_reg[0]);
 3309         if (regex->re_reg[1] != NULL)
 3310             refree(regex->re_reg[1]);
 3311         freenode(regex);
 3312     }
 3313 
 3314     if (flags != GENSUB)
 3315         reset_record();
 3316     return result;
 3317 }
 3318 
 3319 /* call_match --- call do_match indirectly */
 3320 
 3321 NODE *
 3322 call_match(int nargs)
 3323 {
 3324     NODE *regex, *text, *array;
 3325     NODE *result;
 3326 
 3327     regex = text = array = NULL;
 3328     if (nargs == 3)
 3329         array = POP();
 3330     regex = POP();
 3331 
 3332     /* Don't need to pop the string just to push it back ... */
 3333 
 3334     bool need_free = false;
 3335     if ((regex->flags & REGEX) != 0)
 3336         regex = regex->typed_re;
 3337     else {
 3338         regex = make_regnode(Node_regex, regex);
 3339         need_free = true;
 3340     }
 3341 
 3342     PUSH(regex);
 3343 
 3344     if (array)
 3345         PUSH(array);
 3346 
 3347     result = do_match(nargs);
 3348 
 3349     if (need_free) {
 3350         refree(regex->re_reg[0]);
 3351         if (regex->re_reg[1] != NULL)
 3352             refree(regex->re_reg[1]);
 3353         freenode(regex);
 3354     }
 3355 
 3356     return result;
 3357 }
 3358 
 3359 /* call_split_func --- call do_split or do_pat_split indirectly */
 3360 
 3361 NODE *
 3362 call_split_func(const char *name, int nargs)
 3363 {
 3364     NODE *regex, *seps;
 3365     NODE *result;
 3366 
 3367     regex = seps = NULL;
 3368     if (nargs < 2)
 3369         fatal(_("indirect call to %s requires at least two arguments"),
 3370                 name);
 3371 
 3372     if (nargs == 4)
 3373         seps = POP();
 3374 
 3375     bool need_free = false;
 3376     if (nargs >= 3) {
 3377         regex = POP_STRING();
 3378         if ((regex->flags & REGEX) != 0)
 3379             regex = regex->typed_re;
 3380         else {
 3381             regex = make_regnode(Node_regex, regex);
 3382             need_free = true;
 3383         }
 3384     } else {
 3385         if (name[0] == 's') {
 3386             regex = make_regnode(Node_regex, FS_node->var_value);
 3387             regex->re_flags |= FS_DFLT;
 3388         } else
 3389             regex = make_regnode(Node_regex, FPAT_node->var_value);
 3390 
 3391         need_free = true;
 3392         nargs++;
 3393     }
 3394 
 3395     /* Don't need to pop the string or the data array */
 3396 
 3397     PUSH(regex);
 3398 
 3399     if (seps)
 3400         PUSH(seps);
 3401 
 3402     result = (name[0] == 's') ? do_split(nargs) : do_patsplit(nargs);
 3403 
 3404     if (need_free) {
 3405         refree(regex->re_reg[0]);
 3406         if (regex->re_reg[1] != NULL)
 3407             refree(regex->re_reg[1]);
 3408         freenode(regex);
 3409     }
 3410 
 3411     return result;
 3412 }
 3413 
 3414 /* make_integer - Convert an integer to a number node.  */
 3415 
 3416 static NODE *
 3417 make_integer(uintmax_t n)
 3418 {
 3419     n = adjust_uint(n);
 3420 
 3421     return make_number((AWKNUM) n);
 3422 }
 3423 
 3424 /* do_lshift --- perform a << operation */
 3425 
 3426 NODE *
 3427 do_lshift(int nargs)
 3428 {
 3429     NODE *s1, *s2;
 3430     uintmax_t uval, ushift, res;
 3431     AWKNUM val, shift;
 3432 
 3433     POP_TWO_SCALARS(s1, s2);
 3434     if (do_lint) {
 3435         if ((fixtype(s1)->flags & NUMBER) == 0)
 3436             lintwarn(_("%s: received non-numeric first argument"), "lshift");
 3437         if ((fixtype(s2)->flags & NUMBER) == 0)
 3438             lintwarn(_("%s: received non-numeric second argument"), "lshift");
 3439     }
 3440 
 3441     val = force_number(s1)->numbr;
 3442     shift = force_number(s2)->numbr;
 3443     if (val < 0 || shift < 0)
 3444         fatal(_("lshift(%f, %f): negative values are not allowed"), val, shift);
 3445 
 3446     if (do_lint) {
 3447         if (double_to_int(val) != val || double_to_int(shift) != shift)
 3448             lintwarn(_("lshift(%f, %f): fractional values will be truncated"), val, shift);
 3449         if (shift >= sizeof(uintmax_t) * CHAR_BIT)
 3450             lintwarn(_("lshift(%f, %f): too large shift value will give strange results"), val, shift);
 3451     }
 3452 
 3453     DEREF(s1);
 3454     DEREF(s2);
 3455 
 3456     uval = (uintmax_t) val;
 3457     ushift = (uintmax_t) shift;
 3458 
 3459     res = uval << ushift;
 3460     return make_integer(res);
 3461 }
 3462 
 3463 /* do_rshift --- perform a >> operation */
 3464 
 3465 NODE *
 3466 do_rshift(int nargs)
 3467 {
 3468     NODE *s1, *s2;
 3469     uintmax_t uval, ushift, res;
 3470     AWKNUM val, shift;
 3471 
 3472     POP_TWO_SCALARS(s1, s2);
 3473     if (do_lint) {
 3474         if ((fixtype(s1)->flags & NUMBER) == 0)
 3475             lintwarn(_("%s: received non-numeric first argument"), "rshift");
 3476         if ((fixtype(s2)->flags & NUMBER) == 0)
 3477             lintwarn(_("%s: received non-numeric second argument"), "rshift");
 3478     }
 3479 
 3480     val = force_number(s1)->numbr;
 3481     shift = force_number(s2)->numbr;
 3482     if (val < 0 || shift < 0)
 3483         fatal(_("rshift(%f, %f): negative values are not allowed"), val, shift);
 3484 
 3485     if (do_lint) {
 3486         if (double_to_int(val) != val || double_to_int(shift) != shift)
 3487             lintwarn(_("rshift(%f, %f): fractional values will be truncated"), val, shift);
 3488         if (shift >= sizeof(uintmax_t) * CHAR_BIT)
 3489             lintwarn(_("rshift(%f, %f): too large shift value will give strange results"), val, shift);
 3490     }
 3491 
 3492     DEREF(s1);
 3493     DEREF(s2);
 3494 
 3495     uval = (uintmax_t) val;
 3496     ushift = (uintmax_t) shift;
 3497 
 3498     res = uval >> ushift;
 3499     return make_integer(res);
 3500 }
 3501 
 3502 /* do_and --- perform an & operation */
 3503 
 3504 NODE *
 3505 do_and(int nargs)
 3506 {
 3507     NODE *s1;
 3508     uintmax_t res, uval;
 3509     AWKNUM val;
 3510 
 3511     res = ~(uintmax_t) 0;   /* start off with all ones */
 3512     if (nargs < 2)
 3513         fatal(_("%s: called with less than two arguments"), "and");
 3514 
 3515     for (; nargs > 0; nargs--) {
 3516         s1 = POP_SCALAR();
 3517         if (do_lint && (fixtype(s1)->flags & NUMBER) == 0)
 3518             lintwarn(_("%s: argument %d is non-numeric"), "and", nargs);
 3519 
 3520         val = force_number(s1)->numbr;
 3521         if (val < 0)
 3522             fatal(_("%s: argument %d negative value %g is not allowed"), "and", nargs, val);
 3523 
 3524         uval = (uintmax_t) val;
 3525         res &= uval;
 3526 
 3527         DEREF(s1);
 3528     }
 3529 
 3530     return make_integer(res);
 3531 }
 3532 
 3533 /* do_or --- perform an | operation */
 3534 
 3535 NODE *
 3536 do_or(int nargs)
 3537 {
 3538     NODE *s1;
 3539     uintmax_t res, uval;
 3540     AWKNUM val;
 3541 
 3542     res = 0;
 3543     if (nargs < 2)
 3544         fatal(_("%s: called with less than two arguments"), "or");
 3545 
 3546     for (; nargs > 0; nargs--) {
 3547         s1 = POP_SCALAR();
 3548         if (do_lint && (fixtype(s1)->flags & NUMBER) == 0)
 3549             lintwarn(_("%s: argument %d is non-numeric"), "or", nargs);
 3550 
 3551         val = force_number(s1)->numbr;
 3552         if (val < 0)
 3553             fatal(_("%s: argument %d negative value %g is not allowed"), "or", nargs, val);
 3554 
 3555         uval = (uintmax_t) val;
 3556         res |= uval;
 3557 
 3558         DEREF(s1);
 3559     }
 3560 
 3561     return make_integer(res);
 3562 }
 3563 
 3564 /* do_xor --- perform an ^ operation */
 3565 
 3566 NODE *
 3567 do_xor(int nargs)
 3568 {
 3569     NODE *s1;
 3570     uintmax_t res, uval;
 3571     AWKNUM val;
 3572 
 3573     if (nargs < 2)
 3574         fatal(_("%s: called with less than two arguments"), "xor");
 3575 
 3576     res = 0;    /* start with all zeroes */
 3577     for (; nargs > 0; nargs--) {
 3578         s1 = POP_SCALAR();
 3579         if (do_lint && (fixtype(s1)->flags & NUMBER) == 0)
 3580             lintwarn(_("%s: argument %d is non-numeric"), "xor", nargs);
 3581 
 3582         val = force_number(s1)->numbr;
 3583         if (val < 0)
 3584             fatal(_("%s: argument %d negative value %g is not allowed"), "xor", nargs, val);
 3585 
 3586         uval = (uintmax_t) val;
 3587         res ^= uval;
 3588 
 3589         DEREF(s1);
 3590     }
 3591 
 3592     return make_integer(res);
 3593 }
 3594 
 3595 /* do_compl --- perform a ~ operation */
 3596 
 3597 NODE *
 3598 do_compl(int nargs)
 3599 {
 3600     NODE *tmp;
 3601     double d;
 3602     uintmax_t uval;
 3603 
 3604     tmp = POP_SCALAR();
 3605     if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0)
 3606         lintwarn(_("%s: received non-numeric argument"), "compl");
 3607     d = force_number(tmp)->numbr;
 3608     DEREF(tmp);
 3609 
 3610     if (d < 0)
 3611         fatal(_("compl(%f): negative value is not allowed"), d);
 3612 
 3613     if (do_lint && double_to_int(d) != d)
 3614         lintwarn(_("compl(%f): fractional value will be truncated"), d);
 3615 
 3616     uval = (uintmax_t) d;
 3617     uval = ~ uval;
 3618     return make_integer(uval);
 3619 }
 3620 
 3621 /* do_strtonum --- the strtonum function */
 3622 
 3623 NODE *
 3624 do_strtonum(int nargs)
 3625 {
 3626     NODE *tmp;
 3627     AWKNUM d;
 3628 
 3629     tmp = fixtype(POP_SCALAR());
 3630     if ((tmp->flags & NUMBER) != 0)
 3631         d = (AWKNUM) tmp->numbr;
 3632     else if (get_numbase(tmp->stptr, tmp->stlen, use_lc_numeric) != 10)
 3633         d = nondec2awknum(tmp->stptr, tmp->stlen, NULL);
 3634     else
 3635         d = (AWKNUM) force_number(tmp)->numbr;
 3636 
 3637     DEREF(tmp);
 3638     return make_number((AWKNUM) d);
 3639 }
 3640 
 3641 /* nondec2awknum --- convert octal or hex value to double */
 3642 
 3643 /*
 3644  * Because of awk's concatenation rules and the way awk.y:yylex()
 3645  * collects a number, this routine has to be willing to stop on the
 3646  * first invalid character.
 3647  */
 3648 
 3649 AWKNUM
 3650 nondec2awknum(char *str, size_t len, char **endptr)
 3651 {
 3652     AWKNUM retval = 0.0;
 3653     char save;
 3654     short val;
 3655     char *start = str;
 3656 
 3657     if (len >= 2 && *str == '0' && (str[1] == 'x' || str[1] == 'X')) {
 3658         /*
 3659          * User called strtonum("0x") or some such,
 3660          * so just quit early.
 3661          */
 3662         if (len <= 2) {
 3663             if (endptr)
 3664                 *endptr = start;
 3665             return (AWKNUM) 0.0;
 3666         }
 3667 
 3668         for (str += 2, len -= 2; len > 0; len--, str++) {
 3669             switch (*str) {
 3670             case '0':
 3671             case '1':
 3672             case '2':
 3673             case '3':
 3674             case '4':
 3675             case '5':
 3676             case '6':
 3677             case '7':
 3678             case '8':
 3679             case '9':
 3680                 val = *str - '0';
 3681                 break;
 3682             case 'a':
 3683             case 'b':
 3684             case 'c':
 3685             case 'd':
 3686             case 'e':
 3687             case 'f':
 3688                 val = *str - 'a' + 10;
 3689                 break;
 3690             case 'A':
 3691             case 'B':
 3692             case 'C':
 3693             case 'D':
 3694             case 'E':
 3695             case 'F':
 3696                 val = *str - 'A' + 10;
 3697                 break;
 3698             default:
 3699                 if (endptr)
 3700                     *endptr = str;
 3701                 goto done;
 3702             }
 3703             retval = (retval * 16) + val;
 3704         }
 3705         if (endptr)
 3706             *endptr = str;
 3707     } else if (len >= 1 && *str == '0') {
 3708         for (; len > 0; len--) {
 3709             if (! isdigit((unsigned char) *str)) {
 3710                 if (endptr)
 3711                     *endptr = str;
 3712                 goto done;
 3713             }
 3714             else if (*str == '8' || *str == '9') {
 3715                 str = start;
 3716                 goto decimal;
 3717             }
 3718             retval = (retval * 8) + (*str - '0');
 3719             str++;
 3720         }
 3721         if (endptr)
 3722             *endptr = str;
 3723     } else {
 3724 decimal:
 3725         save = str[len];
 3726         str[len] = '\0';
 3727         retval = strtod(str, endptr);
 3728         str[len] = save;
 3729     }
 3730 done:
 3731     return retval;
 3732 }
 3733 
 3734 /* do_dcgettext, do_dcngettext --- handle i18n translations */
 3735 
 3736 #if ENABLE_NLS && defined(LC_MESSAGES) && HAVE_DCGETTEXT
 3737 
 3738 static int
 3739 localecategory_from_argument(NODE *t)
 3740 {
 3741     static const struct category_table {
 3742         int val;
 3743         const char *name;
 3744     } cat_tab[] = {
 3745 #ifdef LC_ALL
 3746         { LC_ALL,   "LC_ALL" },
 3747 #endif /* LC_ALL */
 3748 #ifdef LC_COLLATE
 3749         { LC_COLLATE,   "LC_COLLATE" },
 3750 #endif /* LC_COLLATE */
 3751 #ifdef LC_CTYPE
 3752         { LC_CTYPE, "LC_CTYPE" },
 3753 #endif /* LC_CTYPE */
 3754 #ifdef LC_MESSAGES
 3755         { LC_MESSAGES,  "LC_MESSAGES" },
 3756 #endif /* LC_MESSAGES */
 3757 #ifdef LC_MONETARY
 3758         { LC_MONETARY,  "LC_MONETARY" },
 3759 #endif /* LC_MONETARY */
 3760 #ifdef LC_NUMERIC
 3761         { LC_NUMERIC,   "LC_NUMERIC" },
 3762 #endif /* LC_NUMERIC */
 3763 #ifdef LC_RESPONSE
 3764         { LC_RESPONSE,  "LC_RESPONSE" },
 3765 #endif /* LC_RESPONSE */
 3766 #ifdef LC_TIME
 3767         { LC_TIME,  "LC_TIME" },
 3768 #endif /* LC_TIME */
 3769     };
 3770 
 3771     if (t != NULL) {
 3772         int low, high, i, mid;
 3773         char *category;
 3774         int lc_cat = -1;
 3775 
 3776         char save = t->stptr[t->stlen];
 3777         t->stptr[t->stlen] = '\0';
 3778         category = t->stptr;
 3779 
 3780         /* binary search the table */
 3781         low = 0;
 3782         high = (sizeof(cat_tab) / sizeof(cat_tab[0])) - 1;
 3783         while (low <= high) {
 3784             mid = (low + high) / 2;
 3785             i = strcmp(category, cat_tab[mid].name);
 3786 
 3787             if (i < 0)      /* category < mid */
 3788                 high = mid - 1;
 3789             else if (i > 0)     /* category > mid */
 3790                 low = mid + 1;
 3791             else {
 3792                 lc_cat = cat_tab[mid].val;
 3793                 break;
 3794             }
 3795         }
 3796         t->stptr[t->stlen] = save;
 3797         if (lc_cat == -1)   /* not there */
 3798             fatal(_("dcgettext: `%s' is not a valid locale category"), category);
 3799 
 3800         return lc_cat;
 3801     } else
 3802         return LC_MESSAGES;
 3803 }
 3804 
 3805 #endif
 3806 
 3807 /*
 3808  * awk usage is
 3809  *
 3810  *  str = dcgettext(string [, domain [, category]])
 3811  *  str = dcngettext(string1, string2, number [, domain [, category]])
 3812  *
 3813  * Default domain is TEXTDOMAIN, default category is LC_MESSAGES.
 3814  */
 3815 
 3816 NODE *
 3817 do_dcgettext(int nargs)
 3818 {
 3819     NODE *tmp, *t1, *t2 = NULL;
 3820     char *string;
 3821     char *the_result;
 3822     size_t reslen;
 3823 #if ENABLE_NLS && defined(LC_MESSAGES) && HAVE_DCGETTEXT
 3824     int lc_cat;
 3825     char *domain;
 3826     char save1 = '\0', save2 = '\0';
 3827 
 3828     if (nargs == 3) {   /* third argument */
 3829         tmp = POP_STRING();
 3830         lc_cat = localecategory_from_argument(tmp);
 3831         DEREF(tmp);
 3832     } else
 3833         lc_cat = LC_MESSAGES;
 3834 
 3835     if (nargs >= 2) {  /* second argument */
 3836         t2 = POP_STRING();
 3837         domain = t2->stptr;
 3838         str_terminate(t2, save2);
 3839     } else
 3840         domain = TEXTDOMAIN;
 3841 #else
 3842     if (nargs == 3) {
 3843         tmp = POP_STRING();
 3844         DEREF(tmp);
 3845     }
 3846     if (nargs >= 2) {
 3847         t2 = POP_STRING();
 3848         DEREF(t2);
 3849     }
 3850 #endif
 3851 
 3852     t1 = POP_STRING();  /* first argument */
 3853     string = t1->stptr;
 3854 
 3855 #if ENABLE_NLS && defined(LC_MESSAGES) && HAVE_DCGETTEXT
 3856     str_terminate(t1, save1);
 3857     the_result = dcgettext(domain, string, lc_cat);
 3858     str_restore(t1, save1);
 3859     if (t2 != NULL) {
 3860         str_restore(t2, save2);
 3861         DEREF(t2);
 3862     }
 3863     reslen = strlen(the_result);
 3864 #else
 3865     the_result = string;
 3866     reslen = t1->stlen;
 3867 #endif
 3868     DEREF(t1);
 3869     return make_string(the_result, reslen);
 3870 }
 3871 
 3872 
 3873 NODE *
 3874 do_dcngettext(int nargs)
 3875 {
 3876     NODE *tmp, *t1, *t2, *t3;
 3877     char *string1, *string2;
 3878     unsigned long number;
 3879     AWKNUM d;
 3880     char *the_result;
 3881     size_t reslen;
 3882 
 3883 #if ENABLE_NLS && defined(LC_MESSAGES) && HAVE_DCGETTEXT
 3884     int lc_cat;
 3885     char *domain;
 3886     char save = '\0', save1 = '\0', save2 = '\0';
 3887     bool saved_end = false;
 3888 
 3889     if (nargs == 5) {   /* fifth argument */
 3890         tmp = POP_STRING();
 3891         lc_cat = localecategory_from_argument(tmp);
 3892         DEREF(tmp);
 3893     } else
 3894         lc_cat = LC_MESSAGES;
 3895 
 3896     t3 = NULL;
 3897     if (nargs >= 4) {   /* fourth argument */
 3898         t3 = POP_STRING();
 3899         domain = t3->stptr;
 3900         save = domain[t3->stlen];
 3901         domain[t3->stlen] = '\0';
 3902         saved_end = true;
 3903     } else
 3904         domain = TEXTDOMAIN;
 3905 #else
 3906     if (nargs == 5) {
 3907         tmp = POP_STRING();
 3908         DEREF(tmp);
 3909     }
 3910     if (nargs >= 4) {
 3911         t3 = POP_STRING();
 3912         DEREF(t3);
 3913     }
 3914 #endif
 3915 
 3916     t2 = POP_NUMBER();  /* third argument */
 3917     d = get_number_d(t2);
 3918     DEREF(t2);
 3919 
 3920     number = (unsigned long) double_to_int(d);
 3921     t2 = POP_STRING();  /* second argument */
 3922     string2 = t2->stptr;
 3923     t1 = POP_STRING();  /* first argument */
 3924     string1 = t1->stptr;
 3925 
 3926 #if ENABLE_NLS && defined(LC_MESSAGES) && HAVE_DCGETTEXT
 3927 
 3928     str_terminate(t1, save1);
 3929     str_terminate(t2, save2);
 3930     the_result = dcngettext(domain, string1, string2, number, lc_cat);
 3931     reslen = strlen(the_result);
 3932     str_restore(t1, save1);
 3933     str_restore(t2, save2);
 3934     if (saved_end)
 3935         domain[t3->stlen] = save;
 3936     if (t3 != NULL)
 3937         DEREF(t3);
 3938 #else
 3939     if (number == 1) {
 3940         the_result = string1;
 3941         reslen = t1->stlen;
 3942     } else {
 3943         the_result = string2;
 3944         reslen = t2->stlen;
 3945     }
 3946 #endif
 3947     DEREF(t1);
 3948     DEREF(t2);
 3949     return make_string(the_result, reslen);
 3950 }
 3951 
 3952 /* do_bindtextdomain --- set the directory for a text domain */
 3953 
 3954 /*
 3955  * awk usage is
 3956  *
 3957  *  binding = bindtextdomain(dir [, domain])
 3958  *
 3959  * If dir is "", pass NULL to C version.
 3960  * Default domain is TEXTDOMAIN.
 3961  */
 3962 
 3963 NODE *
 3964 do_bindtextdomain(int nargs)
 3965 {
 3966     NODE *t1, *t2;
 3967     const char *directory, *domain;
 3968     const char *the_result;
 3969 
 3970     t1 = t2 = NULL;
 3971     /* set defaults */
 3972     directory = NULL;
 3973     domain = TEXTDOMAIN;
 3974     char save = '\0', save1 = '\0';
 3975 
 3976     if (nargs == 2) {   /* second argument */
 3977         t2 = POP_STRING();
 3978         domain = (const char *) t2->stptr;
 3979         save = t2->stptr[t2->stlen];
 3980         t2->stptr[t2->stlen] = '\0';
 3981     }
 3982 
 3983     /* first argument */
 3984     t1 = POP_STRING();
 3985     if (t1->stlen > 0) {
 3986         directory = (const char *) t1->stptr;
 3987         str_terminate(t1, save1);
 3988     }
 3989 
 3990     the_result = bindtextdomain(domain, directory);
 3991     if (directory)
 3992         str_restore(t1, save1);
 3993 
 3994     DEREF(t1);
 3995     if (t2 != NULL) {
 3996         t2->stptr[t2->stlen] = save;
 3997         DEREF(t2);
 3998     }
 3999 
 4000     return make_string(the_result, strlen(the_result));
 4001 }
 4002 
 4003 #ifdef SUPPLY_INTDIV
 4004 /* do_intdiv --- do integer division, return quotient and remainder in dest array */
 4005 
 4006 /*
 4007  * We define the semantics as:
 4008  *  numerator = int(numerator)
 4009  *  denominator = int(denonmator)
 4010  *  quotient = int(numerator / denomator)
 4011  *  remainder = int(numerator % denomator)
 4012  */
 4013 
 4014 NODE *
 4015 do_intdiv(int nargs)
 4016 {
 4017     NODE *numerator, *denominator, *result;
 4018     double num, denom, quotient, remainder;
 4019 
 4020     result = POP_PARAM();
 4021     if (result->type != Node_var_array)
 4022         fatal(_("intdiv: third argument is not an array"));
 4023     assoc_clear(result);
 4024 
 4025     denominator = POP_SCALAR();
 4026     numerator = POP_SCALAR();
 4027 
 4028     if (do_lint) {
 4029         if ((fixtype(numerator)->flags & NUMBER) == 0)
 4030             lintwarn(_("%s: received non-numeric first argument"), "intdiv");
 4031         if ((fixtype(denominator)->flags & NUMBER) == 0)
 4032             lintwarn(_("%s: received non-numeric second argument"), "intdiv");
 4033     }
 4034 
 4035     (void) force_number(numerator);
 4036     (void) force_number(denominator);
 4037     num = double_to_int(get_number_d(numerator));
 4038     denom = double_to_int(get_number_d(denominator));
 4039 
 4040     if (denom == 0.0)
 4041         fatal(_("intdiv: division by zero attempted"));
 4042 
 4043     quotient = double_to_int(num / denom);
 4044     /*
 4045      * FIXME: This code is duplicated, factor it out to a
 4046      * separate function.
 4047      */
 4048 #ifdef HAVE_FMOD
 4049     remainder = fmod(num, denom);
 4050 #else   /* ! HAVE_FMOD */
 4051     (void) modf(num / denom, & remainder);
 4052     remainder = num - remainder * denom;
 4053 #endif  /* ! HAVE_FMOD */
 4054     remainder = double_to_int(remainder);
 4055 
 4056     assoc_set(result, make_string("quotient", 8), make_number((AWKNUM) quotient));
 4057 
 4058     assoc_set(result, make_string("remainder", 9), make_number((AWKNUM) remainder));
 4059 
 4060     DEREF(denominator);
 4061     DEREF(numerator);
 4062 
 4063     return make_number((AWKNUM) 0.0);
 4064 }
 4065 #endif /* SUPPLY_INTDIV */
 4066 
 4067 /* do_typeof --- return a string with the type of the arg */
 4068 
 4069 NODE *
 4070 do_typeof(int nargs)
 4071 {
 4072     NODE *arg;
 4073     char *res = NULL;
 4074     bool deref = true;
 4075     NODE *dbg;
 4076 
 4077     if (nargs == 2) {   /* 2nd optional arg for debugging */
 4078         dbg = POP_PARAM();
 4079         if (dbg->type != Node_var_array)
 4080             fatal(_("typeof: second argument is not an array"));
 4081         assoc_clear(dbg);
 4082     }
 4083     else
 4084         dbg = NULL;
 4085     arg = POP();
 4086     switch (arg->type) {
 4087     case Node_var_array:
 4088         /* Node_var_array is never UPREF'ed */
 4089         res = "array";
 4090         deref = false;
 4091         if (dbg) {
 4092             assoc_set(dbg, make_string("array_type", 10), make_string(arg->array_funcs->name, strlen(arg->array_funcs->name)));
 4093             if (arg == PROCINFO_node) {
 4094                 int i;
 4095                 for (i = 0; i < BLOCK_MAX; i++) {
 4096                     char *p;
 4097                     size_t nl = strlen(nextfree[i].name);
 4098                     /*
 4099                      * save values before we create new
 4100                      * array elements so that we have a
 4101                      * snapshot at a consistent moment in
 4102                      * time
 4103                      */
 4104                     long hw = nextfree[i].highwater;
 4105                     long active;
 4106 #ifdef MEMDEBUG
 4107                     active = nextfree[i].active;
 4108 #else
 4109                     active = hw;
 4110                     {
 4111                         struct block_item *ip;
 4112                         for (ip = nextfree[i].freep; ip; ip = ip->freep)
 4113                             active--;
 4114                     }
 4115 #endif
 4116 
 4117 #define SETVAL(X, V) {  \
 4118     size_t l = nl + sizeof(#X); \
 4119     emalloc(p, char *, l+1, "do_typeof");   \
 4120     sprintf(p, "%s_" #X, nextfree[i].name); \
 4121     assoc_set(dbg, make_str_node(p, l, ALREADY_MALLOCED), make_number((AWKNUM) (V)));   \
 4122 }
 4123                     SETVAL(highwater, hw)
 4124                     SETVAL(active, active)
 4125 #undef SETVAL
 4126                 }
 4127             }
 4128         }
 4129         break;
 4130     case Node_val:
 4131         switch (fixtype(arg)->flags & (STRING|NUMBER|USER_INPUT|REGEX)) {
 4132         case NUMBER:
 4133             res = "number";
 4134             break;
 4135         case NUMBER|USER_INPUT:
 4136             res = "strnum";
 4137             break;
 4138         case REGEX:
 4139             res = "regexp";
 4140             break;
 4141         case STRING:
 4142             res = "string";
 4143             // fall through
 4144         case NUMBER|STRING:
 4145             if (arg == Nnull_string || (arg->flags & NULL_FIELD) != 0) {
 4146                 res = "unassigned";
 4147                 break;
 4148             }
 4149             /* fall through */
 4150         default:
 4151             if (res == NULL) {
 4152                 warning(_("typeof detected invalid flags combination `%s'; please file a bug report."), flags2str(arg->flags));
 4153                 res = "unknown";
 4154             }
 4155             break;
 4156         }
 4157         if (dbg) {
 4158             const char *s = flags2str(arg->flags);
 4159             assoc_set(dbg, make_string("flags", 5), make_string(s, strlen(s)));
 4160         }
 4161         break;
 4162     case Node_var_new:
 4163     case Node_array_ref:
 4164         res = "untyped";
 4165         deref = false;
 4166         break;
 4167     case Node_var:
 4168         /*
 4169          * Note: this doesn't happen because the function calling code
 4170          * in interpret.h pushes Node_var->var_value.
 4171          */
 4172         fatal(_("typeof: invalid argument type `%s'"),
 4173                 nodetype2str(arg->type));
 4174         break;
 4175     default:
 4176         fatal(_("typeof: unknown argument type `%s'"),
 4177                 nodetype2str(arg->type));
 4178         break;
 4179     }
 4180 
 4181     if (deref)
 4182         DEREF(arg);
 4183     return make_string(res, strlen(res));
 4184 }
 4185 
 4186 /* mbc_byte_count --- return number of bytes for corresponding numchars multibyte characters */
 4187 
 4188 static size_t
 4189 mbc_byte_count(const char *ptr, size_t numchars)
 4190 {
 4191     mbstate_t cur_state;
 4192     size_t sum = 0;
 4193     int mb_len;
 4194 
 4195     memset(& cur_state, 0, sizeof(cur_state));
 4196 
 4197     assert(gawk_mb_cur_max > 1);
 4198     mb_len = mbrlen(ptr, numchars * gawk_mb_cur_max, &cur_state);
 4199     if (mb_len <= 0)
 4200         return numchars;    /* no valid m.b. char */
 4201 
 4202     for (; numchars > 0; numchars--) {
 4203         mb_len = mbrlen(ptr, numchars * gawk_mb_cur_max, &cur_state);
 4204         if (mb_len <= 0)
 4205             break;
 4206         sum += mb_len;
 4207         ptr += mb_len;
 4208     }
 4209 
 4210     return sum;
 4211 }
 4212 
 4213 /* mbc_char_count --- return number of m.b. chars in string, up to numbytes bytes */
 4214 
 4215 static size_t
 4216 mbc_char_count(const char *ptr, size_t numbytes)
 4217 {
 4218     mbstate_t cur_state;
 4219     size_t sum = 0;
 4220     int mb_len;
 4221 
 4222     if (gawk_mb_cur_max == 1)
 4223         return numbytes;
 4224 
 4225     memset(& cur_state, 0, sizeof(cur_state));
 4226 
 4227     mb_len = mbrlen(ptr, numbytes, &cur_state);
 4228     if (mb_len <= 0)
 4229         return numbytes;    /* no valid m.b. char */
 4230 
 4231     while (numbytes > 0) {
 4232         mb_len = mbrlen(ptr, numbytes, &cur_state);
 4233         if (mb_len <= 0)
 4234             break;
 4235         sum++;
 4236         ptr += mb_len;
 4237         numbytes -= mb_len;
 4238     }
 4239 
 4240     return sum;
 4241 }
 4242 
 4243 /* sanitize_exit_status --- convert a 16 bit Unix exit status into something reasonable */
 4244 
 4245 int sanitize_exit_status(int status)
 4246 {
 4247     int ret = 0;
 4248 
 4249     if (WIFEXITED(status))
 4250         ret = WEXITSTATUS(status); /* normal exit */
 4251     else if (WIFSIGNALED(status)) {
 4252         bool coredumped = false;
 4253 #ifdef WCOREDUMP
 4254         coredumped = WCOREDUMP(status);
 4255 #endif
 4256         /* use 256 since exit values are 8 bits */
 4257         ret = WTERMSIG(status) + (coredumped ? 512 : 256);
 4258     } else
 4259         ret = 0;    /* shouldn't get here */
 4260 
 4261     return ret;
 4262 }
 4263 
 4264 /* out_of_range --- return true if a value is out of range */
 4265 
 4266 bool
 4267 out_of_range(NODE *n)
 4268 {
 4269 #ifdef HAVE_MPFR
 4270     if (is_mpg_integer(n))
 4271         return false;
 4272     else if (is_mpg_float(n))
 4273         return (! mpfr_number_p(n->mpg_numbr));
 4274     else
 4275 #endif
 4276         return (isnan(n->numbr) || isinf(n->numbr));
 4277 }
 4278 
 4279 /* format_nan_inf --- format NaN and INF values */
 4280 
 4281 char *
 4282 format_nan_inf(NODE *n, char format)
 4283 {
 4284     static char buf[100];
 4285 
 4286 #ifdef HAVE_MPFR
 4287     if (is_mpg_integer(n))
 4288         return NULL;
 4289     else if (is_mpg_float(n)) {
 4290         if (mpfr_nan_p(n->mpg_numbr)) {
 4291             strcpy(buf, mpfr_sgn(n->mpg_numbr) < 0 ? "-nan" : "+nan");
 4292 
 4293             goto fmt;
 4294         } else if (mpfr_inf_p(n->mpg_numbr)) {
 4295             strcpy(buf, mpfr_sgn(n->mpg_numbr) < 0 ? "-inf" : "+inf");
 4296 
 4297             goto fmt;
 4298         } else
 4299             return NULL;
 4300     }
 4301     /* else
 4302         fallthrough */
 4303 #endif
 4304     double val = n->numbr;
 4305 
 4306     if (isnan(val)) {
 4307         strcpy(buf, signbit(val) != 0 ? "-nan" : "+nan");
 4308 
 4309         // fall through to end
 4310     } else if (isinf(val)) {
 4311         strcpy(buf, val < 0 ? "-inf" : "+inf");
 4312 
 4313         // fall through to end
 4314     } else
 4315         return NULL;
 4316 
 4317 #ifdef HAVE_MPFR
 4318 fmt:
 4319 #endif
 4320     if (isupper(format)) {
 4321         int i;
 4322 
 4323         for (i = 0; buf[i] != '\0'; i++)
 4324             buf[i] = toupper(buf[i]);
 4325     }
 4326     return buf;
 4327 }