"Fossies" - the Fresh Open Source Software Archive

Member "gawk-5.1.0/main.c" (15 Mar 2020, 47926 Bytes) of package /linux/misc/gawk-5.1.0.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "main.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 5.0.1_vs_5.1.0.

    1 /*
    2  * main.c -- Code generator and main program for gawk.
    3  */
    4 
    5 /*
    6  * Copyright (C) 1986, 1988, 1989, 1991-2020,
    7  * the Free Software Foundation, Inc.
    8  *
    9  * This file is part of GAWK, the GNU implementation of the
   10  * AWK Programming Language.
   11  *
   12  * GAWK is free software; you can redistribute it and/or modify
   13  * it under the terms of the GNU General Public License as published by
   14  * the Free Software Foundation; either version 3 of the License, or
   15  * (at your option) any later version.
   16  *
   17  * GAWK is distributed in the hope that it will be useful,
   18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
   19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   20  * GNU General Public License for more details.
   21  *
   22  * You should have received a copy of the GNU General Public License
   23  * along with this program; if not, write to the Free Software
   24  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
   25  */
   26 
   27 /* FIX THIS BEFORE EVERY RELEASE: */
   28 #define UPDATE_YEAR 2020
   29 
   30 #include "awk.h"
   31 #include "getopt.h"
   32 
   33 #ifdef HAVE_MCHECK_H
   34 #include <mcheck.h>
   35 #endif
   36 
   37 #ifdef HAVE_LIBSIGSEGV
   38 #include <sigsegv.h>
   39 #else
   40 typedef void *stackoverflow_context_t;
   41 /* the argument to this macro is purposely not used */
   42 #define sigsegv_install_handler(catchsegv) signal(SIGSEGV, catchsig)
   43 /* define as 0 rather than empty so that (void) cast on it works */
   44 #define stackoverflow_install_handler(catchstackoverflow, extra_stack, STACK_SIZE) 0
   45 #endif
   46 
   47 #define DEFAULT_PROFILE     "awkprof.out"   /* where to put profile */
   48 #define DEFAULT_VARFILE     "awkvars.out"   /* where to put vars */
   49 #define DEFAULT_PREC        53
   50 #define DEFAULT_ROUNDMODE   "N"     /* round to nearest */
   51 
   52 static const char *varfile = DEFAULT_VARFILE;
   53 const char *command_file = NULL;    /* debugger commands */
   54 
   55 static void usage(int exitval, FILE *fp) ATTRIBUTE_NORETURN;
   56 static void copyleft(void) ATTRIBUTE_NORETURN;
   57 static void cmdline_fs(char *str);
   58 static void init_args(int argc0, int argc, const char *argv0, char **argv);
   59 static void init_vars(void);
   60 static NODE *load_environ(void);
   61 static NODE *load_procinfo(void);
   62 static void catchsig(int sig);
   63 #ifdef HAVE_LIBSIGSEGV
   64 static int catchsegv(void *fault_address, int serious);
   65 static void catchstackoverflow(int emergency, stackoverflow_context_t scp);
   66 #endif
   67 static void nostalgia(void) ATTRIBUTE_NORETURN;
   68 static void version(void) ATTRIBUTE_NORETURN;
   69 static void init_fds(void);
   70 static void init_groupset(void);
   71 static void save_argv(int, char **);
   72 static const char *platform_name();
   73 
   74 /* These nodes store all the special variables AWK uses */
   75 NODE *ARGC_node, *ARGIND_node, *ARGV_node, *BINMODE_node, *CONVFMT_node;
   76 NODE *ENVIRON_node, *ERRNO_node, *FIELDWIDTHS_node, *FILENAME_node;
   77 NODE *FNR_node, *FPAT_node, *FS_node, *IGNORECASE_node, *LINT_node;
   78 NODE *NF_node, *NR_node, *OFMT_node, *OFS_node, *ORS_node, *PROCINFO_node;
   79 NODE *RLENGTH_node, *RSTART_node, *RS_node, *RT_node, *SUBSEP_node;
   80 NODE *PREC_node, *ROUNDMODE_node;
   81 NODE *TEXTDOMAIN_node;
   82 
   83 long NF;
   84 long NR;
   85 long FNR;
   86 int BINMODE;
   87 bool IGNORECASE;
   88 char *OFS;
   89 char *ORS;
   90 char *OFMT;
   91 char *TEXTDOMAIN;
   92 
   93 /*
   94  * CONVFMT is a convenience pointer for the current number to string format.
   95  * We must supply an initial value to avoid recursion problems of
   96  *  set_CONVFMT -> fmt_index -> force_string: gets NULL CONVFMT
   97  * Fun, fun, fun, fun.
   98  */
   99 char *CONVFMT = "%.6g";
  100 
  101 NODE *Nnull_string;     /* The global null string */
  102 
  103 #if defined(HAVE_LOCALE_H)
  104 struct lconv loc;       /* current locale */
  105 static void init_locale(struct lconv *l);
  106 #endif /* defined(HAVE_LOCALE_H) */
  107 
  108 /* The name the program was invoked under, for error messages */
  109 const char *myname;
  110 
  111 /* A block of AWK code to be run */
  112 INSTRUCTION *code_block = NULL;
  113 
  114 char **d_argv;          /* saved argv for debugger restarting */
  115 /*
  116  * List of rules and functions with first and last instruction (source_line)
  117  * information; used for profiling and debugging.
  118  */
  119 INSTRUCTION *rule_list;
  120 
  121 int exit_val = EXIT_SUCCESS;        /* exit value */
  122 
  123 #if defined(YYDEBUG) || defined(GAWKDEBUG)
  124 extern int yydebug;
  125 #endif
  126 
  127 SRCFILE *srcfiles; /* source files */
  128 
  129 /*
  130  * structure to remember variable pre-assignments
  131  */
  132 struct pre_assign {
  133     enum assign_type { PRE_ASSIGN = 1, PRE_ASSIGN_FS } type;
  134     char *val;
  135 };
  136 
  137 static struct pre_assign *preassigns = NULL;    /* requested via -v or -F */
  138 static long numassigns = -1;            /* how many of them */
  139 
  140 static bool disallow_var_assigns = false;   /* true for --exec */
  141 
  142 static void add_preassign(enum assign_type type, char *val);
  143 
  144 static void parse_args(int argc, char **argv);
  145 static void set_locale_stuff(void);
  146 static bool stopped_early = false;
  147 
  148 int do_flags = false;
  149 bool do_optimize = true;        /* apply default optimizations */
  150 static int do_nostalgia = false;    /* provide a blast from the past */
  151 static int do_binary = false;       /* hands off my data! */
  152 static int do_version = false;      /* print version info */
  153 static const char *locale = "";     /* default value to setlocale */
  154 static char *locale_dir = LOCALEDIR;    /* default locale dir */
  155 
  156 int use_lc_numeric = false; /* obey locale for decimal point */
  157 
  158 int gawk_mb_cur_max;        /* MB_CUR_MAX value, see comment in main() */
  159 
  160 FILE *output_fp;        /* default gawk output, can be redirected in the debugger */
  161 bool output_is_tty = false; /* control flushing of output */
  162 
  163 /* default format for strftime(), available via PROCINFO */
  164 const char def_strftime_format[] = "%a %b %e %H:%M:%S %Z %Y";
  165 
  166 extern const char *version_string;
  167 
  168 #if defined (HAVE_GETGROUPS) && defined(NGROUPS_MAX) && NGROUPS_MAX > 0
  169 GETGROUPS_T *groupset;      /* current group set */
  170 int ngroups;            /* size of said set */
  171 #endif
  172 
  173 void (*lintfunc)(const char *mesg, ...) = r_warning;
  174 
  175 /* Sorted by long option name! */
  176 static const struct option optab[] = {
  177     { "assign",     required_argument,  NULL,   'v' },
  178     { "bignum",     no_argument,        NULL,   'M' },
  179     { "characters-as-bytes", no_argument,       & do_binary,     'b' },
  180     { "copyright",      no_argument,        NULL,   'C' },
  181     { "debug",      optional_argument,  NULL,   'D' },
  182     { "dump-variables", optional_argument,  NULL,   'd' },
  183     { "exec",       required_argument,  NULL,   'E' },
  184     { "field-separator",    required_argument,  NULL,   'F' },
  185     { "file",       required_argument,  NULL,   'f' },
  186     { "gen-pot",        no_argument,        NULL,   'g' },
  187     { "help",       no_argument,        NULL,   'h' },
  188     { "include",        required_argument,  NULL,   'i' },
  189     { "lint",       optional_argument,  NULL,   'L' },
  190     { "lint-old",       no_argument,        NULL,   't' },
  191     { "load",       required_argument,  NULL,   'l' },
  192 #if defined(LOCALEDEBUG)
  193     { "locale",     required_argument,  NULL,   'Z' },
  194 #endif
  195     { "non-decimal-data",   no_argument,        NULL,   'n' },
  196     { "no-optimize",    no_argument,        NULL,   's' },
  197     { "nostalgia",      no_argument,        & do_nostalgia, 1 },
  198     { "optimize",       no_argument,        NULL,   'O' },
  199 #if defined(YYDEBUG) || defined(GAWKDEBUG)
  200     { "parsedebug",     no_argument,        NULL,   'Y' },
  201 #endif
  202     { "posix",      no_argument,        NULL,   'P' },
  203     { "pretty-print",   optional_argument,  NULL,   'o' },
  204     { "profile",        optional_argument,  NULL,   'p' },
  205     { "re-interval",    no_argument,        NULL,   'r' },
  206     { "sandbox",        no_argument,        NULL,   'S' },
  207     { "source",     required_argument,  NULL,   'e' },
  208     { "traditional",    no_argument,        NULL,   'c' },
  209     { "use-lc-numeric", no_argument,        & use_lc_numeric, 1 },
  210     { "version",        no_argument,        & do_version, 'V' },
  211     { NULL, 0, NULL, '\0' }
  212 };
  213 
  214 /* main --- process args, parse program, run it, clean up */
  215 
  216 int
  217 main(int argc, char **argv)
  218 {
  219     int i;
  220     char *extra_stack;
  221     int have_srcfile = 0;
  222     SRCFILE *s;
  223     char *cp;
  224 #if defined(LOCALEDEBUG)
  225     const char *initial_locale;
  226 #endif
  227 
  228     /* do these checks early */
  229     if (getenv("TIDYMEM") != NULL)
  230         do_flags |= DO_TIDY_MEM;
  231 
  232 #ifdef HAVE_MCHECK_H
  233 #ifdef HAVE_MTRACE
  234     if (do_tidy_mem)
  235         mtrace();
  236 #endif /* HAVE_MTRACE */
  237 #endif /* HAVE_MCHECK_H */
  238 
  239     myname = gawk_name(argv[0]);
  240     os_arg_fixup(&argc, &argv); /* emulate redirection, expand wildcards */
  241 
  242     if (argc < 2)
  243         usage(EXIT_FAILURE, stderr);
  244 
  245     if ((cp = getenv("GAWK_LOCALE_DIR")) != NULL)
  246         locale_dir = cp;
  247 
  248 #if defined(F_GETFL) && defined(O_APPEND)
  249     // 1/2018: This is needed on modern BSD systems so that the
  250     // inplace tests pass. I think it's a bug in those kernels
  251     // but let's just work around it anyway.
  252     int flags = fcntl(fileno(stderr), F_GETFL, NULL);
  253     if (flags >= 0 && (flags & O_APPEND) == 0) {
  254         flags |= O_APPEND;
  255         (void) fcntl(fileno(stderr), F_SETFL, flags);
  256     }
  257 #endif
  258 
  259 #if defined(LOCALEDEBUG)
  260     initial_locale = locale;
  261 #endif
  262     set_locale_stuff();
  263 
  264     (void) signal(SIGFPE, catchsig);
  265 #ifdef SIGBUS
  266     (void) signal(SIGBUS, catchsig);
  267 #endif
  268 
  269     /*
  270      * Ignore SIGPIPE so that writes to pipes that fail don't
  271      * kill the process but instead return -1 and set errno.
  272      * That lets us print a fatal message instead of dieing suddenly.
  273      *
  274      * Note that this requires ignoring EPIPE when writing and
  275      * flushing stdout/stderr in other parts of the program. E.g.,
  276      *
  277      *  gawk 'BEGIN { print "hi" }' | exit
  278      *
  279      * should not give us "broken pipe" messages --- mainly because
  280      * it did not do so in the past and people would complain.
  281      */
  282     ignore_sigpipe();
  283 
  284     (void) sigsegv_install_handler(catchsegv);
  285 #define STACK_SIZE (16*1024)
  286     emalloc(extra_stack, char *, STACK_SIZE, "main");
  287     (void) stackoverflow_install_handler(catchstackoverflow, extra_stack, STACK_SIZE);
  288 #undef STACK_SIZE
  289 
  290     /* initialize the null string */
  291     Nnull_string = make_string("", 0);
  292 
  293     /* Robustness: check that file descriptors 0, 1, 2 are open */
  294     init_fds();
  295 
  296     /* init array handling. */
  297     array_init();
  298 
  299     /* init the symbol tables */
  300     init_symbol_table();
  301 
  302     output_fp = stdout;
  303 
  304     /* initialize global (main) execution context */
  305     push_context(new_context());
  306 
  307     parse_args(argc, argv);
  308 
  309 #if defined(LOCALEDEBUG)
  310     if (locale != initial_locale)
  311         set_locale_stuff();
  312 #endif
  313 
  314     /*
  315      * In glibc, MB_CUR_MAX is actually a function.  This value is
  316      * tested *a lot* in many speed-critical places in gawk. Caching
  317      * this value once makes a speed difference.
  318      */
  319     gawk_mb_cur_max = MB_CUR_MAX;
  320 
  321     /* init the cache for checking bytes if they're characters */
  322     init_btowc_cache();
  323 
  324     /* set up the single byte case table */
  325     if (gawk_mb_cur_max == 1)
  326         load_casetable();
  327 
  328     if (do_nostalgia)
  329         nostalgia();
  330 
  331     /* check for POSIXLY_CORRECT environment variable */
  332     if (! do_posix && getenv("POSIXLY_CORRECT") != NULL) {
  333         do_flags |= DO_POSIX;
  334         if (do_lint)
  335             lintwarn(
  336     _("environment variable `POSIXLY_CORRECT' set: turning on `--posix'"));
  337     }
  338 
  339     // Checks for conflicting command-line arguments.
  340     if (do_posix) {
  341         use_lc_numeric = true;
  342         if (do_traditional) /* both on command line */
  343             warning(_("`--posix' overrides `--traditional'"));
  344         else
  345             do_flags |= DO_TRADITIONAL;
  346             /*
  347              * POSIX compliance also implies
  348              * no GNU extensions either.
  349              */
  350     }
  351 
  352     if (do_traditional && do_non_decimal_data) {
  353         do_flags &= ~DO_NON_DEC_DATA;
  354         warning(_("`--posix'/`--traditional' overrides `--non-decimal-data'"));
  355     }
  356 
  357     if (do_binary) {
  358         if (do_posix)
  359             warning(_("`--posix' overrides `--characters-as-bytes'"));
  360         else
  361             gawk_mb_cur_max = 1;    /* hands off my data! */
  362 #if defined(LC_ALL)
  363         setlocale(LC_ALL, "C");
  364 #endif
  365     }
  366 
  367     if (do_lint && os_is_setuid())
  368         lintwarn(_("running %s setuid root may be a security problem"), myname);
  369 
  370     if (do_debug)   /* Need to register the debugger pre-exec hook before any other */
  371         init_debug();
  372 
  373 #ifdef HAVE_MPFR
  374     /* Set up MPFR defaults, and register pre-exec hook to process arithmetic opcodes */
  375     if (do_mpfr)
  376         init_mpfr(DEFAULT_PREC, DEFAULT_ROUNDMODE);
  377 #endif
  378 
  379     /* load group set */
  380     init_groupset();
  381 
  382 #ifdef HAVE_MPFR
  383     if (do_mpfr) {
  384         mpz_init(Nnull_string->mpg_i);
  385         Nnull_string->flags = (MALLOC|STRCUR|STRING|MPZN|NUMCUR|NUMBER);
  386     } else
  387 #endif
  388     {
  389         Nnull_string->numbr = 0.0;
  390         Nnull_string->flags = (MALLOC|STRCUR|STRING|NUMCUR|NUMBER);
  391     }
  392 
  393     /*
  394      * Tell the regex routines how they should work.
  395      * Do this before initializing variables, since
  396      * they could want to do a regexp compile.
  397      */
  398     resetup();
  399 
  400     /* Set up the special variables */
  401     init_vars();
  402 
  403     /* Set up the field variables */
  404     init_fields();
  405 
  406     /* Now process the pre-assignments */
  407     int dash_v_errs = 0;    // bad stuff for -v
  408     for (i = 0; i <= numassigns; i++) {
  409         if (preassigns[i].type == PRE_ASSIGN)
  410             dash_v_errs += (arg_assign(preassigns[i].val, true) == false);
  411         else    /* PRE_ASSIGN_FS */
  412             cmdline_fs(preassigns[i].val);
  413         efree(preassigns[i].val);
  414     }
  415 
  416     if (preassigns != NULL)
  417         efree(preassigns);
  418 
  419     if ((BINMODE & BINMODE_INPUT) != 0)
  420         if (os_setbinmode(fileno(stdin), O_BINARY) == -1)
  421             fatal(_("cannot set binary mode on stdin: %s"), strerror(errno));
  422     if ((BINMODE & BINMODE_OUTPUT) != 0) {
  423         if (os_setbinmode(fileno(stdout), O_BINARY) == -1)
  424             fatal(_("cannot set binary mode on stdout: %s"), strerror(errno));
  425         if (os_setbinmode(fileno(stderr), O_BINARY) == -1)
  426             fatal(_("cannot set binary mode on stderr: %s"), strerror(errno));
  427     }
  428 
  429 #ifdef GAWKDEBUG
  430     setbuf(stdout, (char *) NULL);  /* make debugging easier */
  431 #endif
  432     if (os_isatty(fileno(stdout)))
  433         output_is_tty = true;
  434 
  435     /* initialize API before loading extension libraries */
  436     init_ext_api();
  437 
  438     /* load extension libs */
  439     for (s = srcfiles->next; s != srcfiles; s = s->next) {
  440         if (s->stype == SRC_EXTLIB)
  441             load_ext(s->fullpath);
  442         else if (s->stype != SRC_INC)
  443             have_srcfile++;
  444     }
  445 
  446     /* do version check after extensions are loaded to get extension info */
  447     if (do_version)
  448         version();
  449 
  450     /* No -f or --source options, use next arg */
  451     if (! have_srcfile) {
  452         if (optind > argc - 1 || stopped_early) /* no args left or no program */
  453             usage(EXIT_FAILURE, stderr);
  454         (void) add_srcfile(SRC_CMDLINE, argv[optind], srcfiles, NULL, NULL);
  455         optind++;
  456     }
  457 
  458     /* Select the interpreter routine */
  459     init_interpret();
  460 
  461     init_args(optind, argc,
  462             do_posix ? argv[0] : myname,
  463             argv);
  464 
  465 #if defined(LC_NUMERIC)
  466     /*
  467      * FRAGILE!  CAREFUL!
  468      * Pre-initing the variables with arg_assign() can change the
  469      * locale.  Force it to C before parsing the program.
  470      */
  471     setlocale(LC_NUMERIC, "C");
  472 #endif
  473     /* Read in the program */
  474     if (parse_program(& code_block, false) != 0 || dash_v_errs > 0)
  475         exit(EXIT_FAILURE);
  476 
  477     if (do_intl)
  478         exit(EXIT_SUCCESS);
  479 
  480     set_current_namespace(awk_namespace);
  481 
  482     install_builtins();
  483 
  484     if (do_lint)
  485         shadow_funcs();
  486 
  487     if (do_lint && code_block->nexti->opcode == Op_atexit)
  488         lintwarn(_("no program text at all!"));
  489 
  490     load_symbols();
  491 
  492     if (do_profile)
  493         init_profiling_signals();
  494 
  495 #if defined(LC_NUMERIC)
  496     /*
  497      * See comment above about using locale's decimal point.
  498      *
  499      * 10/2005:
  500      * Bitter experience teaches us that most people the world over
  501      * use period as the decimal point, not whatever their locale
  502      * uses.  Thus, only use the locale's decimal point if being
  503      * posixly anal-retentive.
  504      *
  505      * 7/2007:
  506      * Be a little bit kinder. Allow the --use-lc-numeric option
  507      * to also use the local decimal point. This avoids the draconian
  508      * strictness of POSIX mode if someone just wants to parse their
  509      * data using the local decimal point.
  510      */
  511     if (use_lc_numeric)
  512         setlocale(LC_NUMERIC, locale);
  513 #endif
  514 
  515     init_io();
  516     output_fp = stdout;
  517 
  518     if (do_debug)
  519         debug_prog(code_block);
  520     else if (do_pretty_print && ! do_profile)
  521         ;   /* run pretty printer only. */
  522     else
  523         interpret(code_block);
  524 
  525     if (do_pretty_print) {
  526         set_current_namespace(awk_namespace);
  527         dump_prog(code_block);
  528         dump_funcs();
  529     }
  530 
  531     if (do_dump_vars)
  532         dump_vars(varfile);
  533 
  534 #ifdef HAVE_MPFR
  535     if (do_mpfr)
  536         cleanup_mpfr();
  537 #endif
  538 
  539     if (do_tidy_mem)
  540         release_all_vars();
  541 
  542     /* keep valgrind happier */
  543     if (extra_stack)
  544         efree(extra_stack);
  545 
  546     final_exit(exit_val);
  547     return exit_val;    /* to suppress warnings */
  548 }
  549 
  550 /* add_preassign --- add one element to preassigns */
  551 
  552 static void
  553 add_preassign(enum assign_type type, char *val)
  554 {
  555     static long alloc_assigns;      /* for how many are allocated */
  556 
  557 #define INIT_SRC 4
  558 
  559     ++numassigns;
  560 
  561     if (preassigns == NULL) {
  562         emalloc(preassigns, struct pre_assign *,
  563             INIT_SRC * sizeof(struct pre_assign), "add_preassign");
  564         alloc_assigns = INIT_SRC;
  565     } else if (numassigns >= alloc_assigns) {
  566         alloc_assigns *= 2;
  567         erealloc(preassigns, struct pre_assign *,
  568             alloc_assigns * sizeof(struct pre_assign), "add_preassigns");
  569     }
  570     preassigns[numassigns].type = type;
  571     preassigns[numassigns].val = estrdup(val, strlen(val));
  572 
  573 #undef INIT_SRC
  574 }
  575 
  576 /* usage --- print usage information and exit */
  577 
  578 static void
  579 usage(int exitval, FILE *fp)
  580 {
  581     /* Not factoring out common stuff makes it easier to translate. */
  582     fprintf(fp, _("Usage: %s [POSIX or GNU style options] -f progfile [--] file ...\n"),
  583         myname);
  584     fprintf(fp, _("Usage: %s [POSIX or GNU style options] [--] %cprogram%c file ...\n"),
  585             myname, quote, quote);
  586 
  587     /* GNU long options info. This is too many options. */
  588 
  589     fputs(_("POSIX options:\t\tGNU long options: (standard)\n"), fp);
  590     fputs(_("\t-f progfile\t\t--file=progfile\n"), fp);
  591     fputs(_("\t-F fs\t\t\t--field-separator=fs\n"), fp);
  592     fputs(_("\t-v var=val\t\t--assign=var=val\n"), fp);
  593     fputs(_("Short options:\t\tGNU long options: (extensions)\n"), fp);
  594     fputs(_("\t-b\t\t\t--characters-as-bytes\n"), fp);
  595     fputs(_("\t-c\t\t\t--traditional\n"), fp);
  596     fputs(_("\t-C\t\t\t--copyright\n"), fp);
  597     fputs(_("\t-d[file]\t\t--dump-variables[=file]\n"), fp);
  598     fputs(_("\t-D[file]\t\t--debug[=file]\n"), fp);
  599     fputs(_("\t-e 'program-text'\t--source='program-text'\n"), fp);
  600     fputs(_("\t-E file\t\t\t--exec=file\n"), fp);
  601     fputs(_("\t-g\t\t\t--gen-pot\n"), fp);
  602     fputs(_("\t-h\t\t\t--help\n"), fp);
  603     fputs(_("\t-i includefile\t\t--include=includefile\n"), fp);
  604     fputs(_("\t-l library\t\t--load=library\n"), fp);
  605     /*
  606      * TRANSLATORS: the "fatal", "invalid" and "no-ext" here are literal
  607      * values, they should not be translated. Thanks.
  608      */
  609     fputs(_("\t-L[fatal|invalid|no-ext]\t--lint[=fatal|invalid|no-ext]\n"), fp);
  610     fputs(_("\t-M\t\t\t--bignum\n"), fp);
  611     fputs(_("\t-N\t\t\t--use-lc-numeric\n"), fp);
  612     fputs(_("\t-n\t\t\t--non-decimal-data\n"), fp);
  613     fputs(_("\t-o[file]\t\t--pretty-print[=file]\n"), fp);
  614     fputs(_("\t-O\t\t\t--optimize\n"), fp);
  615     fputs(_("\t-p[file]\t\t--profile[=file]\n"), fp);
  616     fputs(_("\t-P\t\t\t--posix\n"), fp);
  617     fputs(_("\t-r\t\t\t--re-interval\n"), fp);
  618     fputs(_("\t-s\t\t\t--no-optimize\n"), fp);
  619     fputs(_("\t-S\t\t\t--sandbox\n"), fp);
  620     fputs(_("\t-t\t\t\t--lint-old\n"), fp);
  621     fputs(_("\t-V\t\t\t--version\n"), fp);
  622 #ifdef NOSTALGIA
  623     fputs(_("\t-W nostalgia\t\t--nostalgia\n"), fp);
  624 #endif
  625 #ifdef GAWKDEBUG
  626     fputs(_("\t-Y\t\t\t--parsedebug\n"), fp);
  627 #endif
  628 #ifdef GAWKDEBUG
  629     fputs(_("\t-Z locale-name\t\t--locale=locale-name\n"), fp);
  630 #endif
  631 
  632     /* This is one string to make things easier on translators. */
  633     /* TRANSLATORS: --help output (end)
  634        no-wrap */
  635     fputs(_("\nTo report bugs, see node `Bugs' in `gawk.info'\n\
  636 which is section `Reporting Problems and Bugs' in the\n\
  637 printed version.  This same information may be found at\n\
  638 https://www.gnu.org/software/gawk/manual/html_node/Bugs.html.\n\
  639 PLEASE do NOT try to report bugs by posting in comp.lang.awk,\n\
  640 or by using a web forum such as Stack Overflow.\n\n"), fp);
  641 
  642     /* ditto */
  643     fputs(_("gawk is a pattern scanning and processing language.\n\
  644 By default it reads standard input and writes standard output.\n\n"), fp);
  645 
  646     /* ditto */
  647     fprintf(fp, _("Examples:\n\t%s '{ sum += $1 }; END { print sum }' file\n\
  648 \t%s -F: '{ print $1 }' /etc/passwd\n"), myname, myname);
  649 
  650     fflush(fp);
  651 
  652     if (ferror(fp)) {
  653 #ifdef __MINGW32__
  654         if (errno == 0 || errno == EINVAL)
  655             w32_maybe_set_errno();
  656 #endif
  657         /* don't warn about stdout/stderr if EPIPE, but do error exit */
  658         if (errno == EPIPE)
  659             die_via_sigpipe();
  660 
  661         if (fp == stdout)
  662             warning(_("error writing standard output: %s"), strerror(errno));
  663         else if (fp == stderr)
  664             warning(_("error writing standard error: %s"), strerror(errno));
  665 
  666         // some other problem than SIGPIPE
  667         exit(EXIT_FAILURE);
  668     }
  669 
  670     exit(exitval);
  671 }
  672 
  673 /* copyleft --- print out the short GNU copyright information */
  674 
  675 static void
  676 copyleft()
  677 {
  678     static const char blurb_part1[] =
  679       N_("Copyright (C) 1989, 1991-%d Free Software Foundation.\n\
  680 \n\
  681 This program is free software; you can redistribute it and/or modify\n\
  682 it under the terms of the GNU General Public License as published by\n\
  683 the Free Software Foundation; either version 3 of the License, or\n\
  684 (at your option) any later version.\n\
  685 \n");
  686     static const char blurb_part2[] =
  687       N_("This program is distributed in the hope that it will be useful,\n\
  688 but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
  689 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n\
  690 GNU General Public License for more details.\n\
  691 \n");
  692     static const char blurb_part3[] =
  693       N_("You should have received a copy of the GNU General Public License\n\
  694 along with this program. If not, see http://www.gnu.org/licenses/.\n");
  695 
  696     /* multiple blurbs are needed for some brain dead compilers. */
  697     printf(_(blurb_part1), UPDATE_YEAR);    /* Last update year */
  698     fputs(_(blurb_part2), stdout);
  699     fputs(_(blurb_part3), stdout);
  700     fflush(stdout);
  701 
  702     if (ferror(stdout)) {
  703 #ifdef __MINGW32__
  704         if (errno == 0 || errno == EINVAL)
  705             w32_maybe_set_errno();
  706 #endif
  707         /* don't warn about stdout if EPIPE, but do error exit */
  708         if (errno != EPIPE)
  709             warning(_("error writing standard output: %s"), strerror(errno));
  710         exit(EXIT_FAILURE);
  711     }
  712 
  713     exit(EXIT_SUCCESS);
  714 }
  715 
  716 /* cmdline_fs --- set FS from the command line */
  717 
  718 static void
  719 cmdline_fs(char *str)
  720 {
  721     NODE **tmp;
  722 
  723     tmp = &FS_node->var_value;
  724     unref(*tmp);
  725     /*
  726      * Only if in full compatibility mode check for the stupid special
  727      * case so -F\t works as documented in awk book even though the shell
  728      * hands us -Ft.  Bleah!
  729      *
  730      * Thankfully, POSIX didn't propagate this "feature".
  731      */
  732     if (str[0] == 't' && str[1] == '\0') {
  733         if (do_lint)
  734             lintwarn(_("-Ft does not set FS to tab in POSIX awk"));
  735         if (do_traditional && ! do_posix)
  736             str[0] = '\t';
  737     }
  738 
  739     *tmp = make_str_node(str, strlen(str), SCAN); /* do process escapes */
  740     set_FS();
  741 }
  742 
  743 /* init_args --- set up ARGV from stuff on the command line */
  744 
  745 static void
  746 init_args(int argc0, int argc, const char *argv0, char **argv)
  747 {
  748     int i, j;
  749     NODE *sub, *val;
  750     NODE *shadow_node = NULL;
  751 
  752     ARGV_node = install_symbol(estrdup("ARGV", 4), Node_var_array);
  753     sub = make_number(0.0);
  754     val = make_string(argv0, strlen(argv0));
  755     val->flags |= USER_INPUT;
  756     assoc_set(ARGV_node, sub, val);
  757 
  758     if (do_sandbox) {
  759         shadow_node = make_array();
  760         sub = make_string(argv0, strlen(argv0));
  761         val = make_number(0.0);
  762         assoc_set(shadow_node, sub, val);
  763     }
  764 
  765 
  766     for (i = argc0, j = 1; i < argc; i++, j++) {
  767         sub = make_number((AWKNUM) j);
  768         val = make_string(argv[i], strlen(argv[i]));
  769         val->flags |= USER_INPUT;
  770         assoc_set(ARGV_node, sub, val);
  771 
  772         if (do_sandbox) {
  773             sub = make_string(argv[i], strlen(argv[i]));
  774             val = make_number(0.0);
  775             assoc_set(shadow_node, sub, val);
  776         }
  777     }
  778 
  779     ARGC_node = install_symbol(estrdup("ARGC", 4), Node_var);
  780     ARGC_node->var_value = make_number((AWKNUM) j);
  781 
  782     if (do_sandbox)
  783         init_argv_array(ARGV_node, shadow_node);
  784 }
  785 
  786 
  787 /*
  788  * Set all the special variables to their initial values.
  789  * Note that some of the variables that have set_FOO routines should
  790  * *N*O*T* have those routines called upon initialization, and thus
  791  * they have NULL entries in that field. This is notably true of FS
  792  * and IGNORECASE.
  793  */
  794 
  795 struct varinit {
  796     NODE **spec;
  797     const char *name;
  798     const char *strval;
  799     AWKNUM numval;
  800     Func_ptr update;
  801     Func_ptr assign;
  802     bool do_assign;
  803     int flags;
  804 #define NO_INSTALL  0x01
  805 #define NON_STANDARD    0x02
  806 #define NOT_OFF_LIMITS  0x04    /* may be accessed by extension function */
  807 };
  808 
  809 static const struct varinit varinit[] = {
  810 {NULL,      "ARGC",     NULL,   0,  NULL, NULL, false, NO_INSTALL },
  811 {&ARGIND_node,  "ARGIND",   NULL,   0,  NULL, NULL, false, NON_STANDARD },
  812 {NULL,      "ARGV",     NULL,   0,  NULL, NULL, false, NO_INSTALL },
  813 {&BINMODE_node, "BINMODE",  NULL,   0,  NULL, set_BINMODE,  false, NON_STANDARD },
  814 {&CONVFMT_node, "CONVFMT",  "%.6g", 0,  NULL, set_CONVFMT,true,     0 },
  815 {NULL,      "ENVIRON",  NULL,   0,  NULL, NULL, false, NO_INSTALL },
  816 {&ERRNO_node,   "ERRNO",    "", 0,  NULL, NULL, false, NON_STANDARD },
  817 {&FIELDWIDTHS_node, "FIELDWIDTHS", "",  0,  NULL, set_FIELDWIDTHS,  false, NON_STANDARD },
  818 {&FILENAME_node, "FILENAME",    "", 0,  NULL, NULL, false, 0 },
  819 {&FNR_node, "FNR",      NULL,   0,  update_FNR, set_FNR,    true, 0 },
  820 {&FS_node,  "FS",       " ",    0,  NULL, set_FS,   false, 0 },
  821 {&FPAT_node,    "FPAT",     "[^[:space:]]+", 0,  NULL, set_FPAT,    false, NON_STANDARD },
  822 {&IGNORECASE_node, "IGNORECASE", NULL,  0,  NULL, set_IGNORECASE,   false, NON_STANDARD },
  823 {&LINT_node,    "LINT",     NULL,   0,  NULL, set_LINT, false, NON_STANDARD },
  824 {&PREC_node,    "PREC",     NULL,   DEFAULT_PREC,   NULL,   set_PREC,   false,  NON_STANDARD},
  825 {&NF_node,  "NF",       NULL,   -1, update_NF, set_NF,  false, 0 },
  826 {&NR_node,  "NR",       NULL,   0,  update_NR, set_NR,  true, 0 },
  827 {&OFMT_node,    "OFMT",     "%.6g", 0,  NULL, set_OFMT, true, 0 },
  828 {&OFS_node, "OFS",      " ",    0,  NULL, set_OFS,  true, 0 },
  829 {&ORS_node, "ORS",      "\n",   0,  NULL, set_ORS,  true, 0 },
  830 {NULL,      "PROCINFO", NULL,   0,  NULL, NULL, false, NO_INSTALL | NON_STANDARD | NOT_OFF_LIMITS },
  831 {&RLENGTH_node, "RLENGTH",  NULL,   0,  NULL, NULL, false, 0 },
  832 {&ROUNDMODE_node, "ROUNDMODE",  DEFAULT_ROUNDMODE,  0,  NULL, set_ROUNDMODE,    false, NON_STANDARD },
  833 {&RS_node,  "RS",       "\n",   0,  NULL, set_RS,   true, 0 },
  834 {&RSTART_node,  "RSTART",   NULL,   0,  NULL, NULL, false, 0 },
  835 {&RT_node,  "RT",       "", 0,  NULL, NULL, false, NON_STANDARD },
  836 {&SUBSEP_node,  "SUBSEP",   "\034", 0,  NULL, set_SUBSEP,   true, 0 },
  837 {&TEXTDOMAIN_node,  "TEXTDOMAIN",   "messages", 0,  NULL, set_TEXTDOMAIN,   true, NON_STANDARD },
  838 {0,     NULL,       NULL,   0,  NULL, NULL, false, 0 },
  839 };
  840 
  841 /* init_vars --- actually initialize everything in the symbol table */
  842 
  843 static void
  844 init_vars()
  845 {
  846     const struct varinit *vp;
  847     NODE *n;
  848 
  849     for (vp = varinit; vp->name != NULL; vp++) {
  850         if ((vp->flags & NO_INSTALL) != 0)
  851             continue;
  852         n = *(vp->spec) = install_symbol(estrdup(vp->name, strlen(vp->name)), Node_var);
  853         if (vp->strval != NULL)
  854             n->var_value = make_string(vp->strval, strlen(vp->strval));
  855         else
  856             n->var_value = make_number(vp->numval);
  857         n->var_assign = (Func_ptr) vp->assign;
  858         n->var_update = (Func_ptr) vp->update;
  859         if (vp->do_assign)
  860             (*(vp->assign))();
  861     }
  862 
  863     /* Load PROCINFO and ENVIRON */
  864     if (! do_traditional)
  865         load_procinfo();
  866     load_environ();
  867 }
  868 
  869 /* path_environ --- put path variable into environment if not already there */
  870 
  871 static void
  872 path_environ(const char *pname, const char *dflt)
  873 {
  874     const char *val;
  875     NODE **aptr;
  876     NODE *tmp;
  877 
  878     tmp = make_string(pname, strlen(pname));
  879     /*
  880      * On VMS, environ[] only holds a subset of what getenv() can
  881      * find, so look AWKPATH up before resorting to default path.
  882      */
  883     val = getenv(pname);
  884     if (val == NULL || *val == '\0')
  885         val = dflt;
  886     aptr = assoc_lookup(ENVIRON_node, tmp);
  887     /*
  888      * If original value was the empty string, set it to
  889      * the default value.
  890      */
  891     if ((*aptr)->stlen == 0) {
  892         unref(*aptr);
  893         *aptr = make_string(val, strlen(val));
  894     }
  895 
  896     unref(tmp);
  897 }
  898 
  899 /* load_environ --- populate the ENVIRON array */
  900 
  901 static NODE *
  902 load_environ()
  903 {
  904 #if ! (defined(VMS) && defined(__DECC))
  905     extern char **environ;
  906 #endif
  907     char *var, *val;
  908     int i;
  909     NODE *sub, *newval;
  910     static bool been_here = false;
  911 
  912     if (been_here)
  913         return ENVIRON_node;
  914 
  915     been_here = true;
  916 
  917     ENVIRON_node = install_symbol(estrdup("ENVIRON", 7), Node_var_array);
  918     for (i = 0; environ[i] != NULL; i++) {
  919         static char nullstr[] = "";
  920 
  921         var = environ[i];
  922         val = strchr(var, '=');
  923         if (val != NULL)
  924             *val++ = '\0';
  925         else
  926             val = nullstr;
  927         sub = make_string(var, strlen(var));
  928         newval = make_string(val, strlen(val));
  929         newval->flags |= USER_INPUT;
  930         assoc_set(ENVIRON_node, sub, newval);
  931 
  932         /* restore '=' so that system() gets a valid environment */
  933         if (val != nullstr)
  934             *--val = '=';
  935     }
  936     /*
  937      * Put AWKPATH and AWKLIBPATH into ENVIRON if not already there.
  938      * This allows querying it from within awk programs.
  939      *
  940      * October 2014:
  941      * If their values are "", override with the default values;
  942      * since 2.10 AWKPATH used default value if environment's
  943      * value was "".
  944      */
  945     path_environ("AWKPATH", defpath);
  946     path_environ("AWKLIBPATH", deflibpath);
  947 
  948     /* set up array functions */
  949     init_env_array(ENVIRON_node);
  950 
  951     return ENVIRON_node;
  952 }
  953 
  954 /* load_procinfo_argv --- populate PROCINFO["argv"] */
  955 
  956 static void
  957 load_procinfo_argv()
  958 {
  959     NODE *sub;
  960     NODE *val;
  961     NODE *argv_array;
  962     int i;
  963 
  964     // build the sub-array first
  965     getnode(argv_array);
  966     memset(argv_array, '\0', sizeof(NODE));  /* valgrind wants this */
  967     null_array(argv_array);
  968     argv_array->parent_array = PROCINFO_node;
  969     argv_array->vname = estrdup("argv", 4);
  970     for (i = 0; d_argv[i] != NULL; i++) {
  971         sub = make_number(i);
  972         val = make_string(d_argv[i], strlen(d_argv[i]));
  973         assoc_set(argv_array, sub, val);
  974     }
  975 
  976     // hook it into PROCINFO
  977     sub = make_string("argv", 4);
  978     assoc_set(PROCINFO_node, sub, argv_array);
  979 
  980 }
  981 
  982 /* load_procinfo --- populate the PROCINFO array */
  983 
  984 static NODE *
  985 load_procinfo()
  986 {
  987 #if defined (HAVE_GETGROUPS) && defined(NGROUPS_MAX) && NGROUPS_MAX > 0
  988     int i;
  989 #endif
  990 #if (defined (HAVE_GETGROUPS) && defined(NGROUPS_MAX) && NGROUPS_MAX > 0) || defined(HAVE_MPFR)
  991     char name[100];
  992 #endif
  993     AWKNUM value;
  994     static bool been_here = false;
  995 
  996     if (been_here)
  997         return PROCINFO_node;
  998 
  999     been_here = true;
 1000 
 1001     PROCINFO_node = install_symbol(estrdup("PROCINFO", 8), Node_var_array);
 1002 
 1003     update_PROCINFO_str("version", VERSION);
 1004     update_PROCINFO_str("strftime", def_strftime_format);
 1005     update_PROCINFO_str("platform", platform_name());
 1006 
 1007 #ifdef HAVE_MPFR
 1008     sprintf(name, "GNU MPFR %s", mpfr_get_version());
 1009     update_PROCINFO_str("mpfr_version", name);
 1010     sprintf(name, "GNU MP %s", gmp_version);
 1011     update_PROCINFO_str("gmp_version", name);
 1012     update_PROCINFO_num("prec_max", MPFR_PREC_MAX);
 1013     update_PROCINFO_num("prec_min", MPFR_PREC_MIN);
 1014 #endif
 1015 
 1016 #ifdef DYNAMIC
 1017     update_PROCINFO_num("api_major", GAWK_API_MAJOR_VERSION);
 1018     update_PROCINFO_num("api_minor", GAWK_API_MINOR_VERSION);
 1019 #endif
 1020 
 1021 #ifdef GETPGRP_VOID
 1022 #define getpgrp_arg() /* nothing */
 1023 #else
 1024 #define getpgrp_arg() getpid()
 1025 #endif
 1026 
 1027     value = getpgrp(getpgrp_arg());
 1028     update_PROCINFO_num("pgrpid", value);
 1029 
 1030     /*
 1031      * Could put a lot of this into a table, but then there's
 1032      * portability problems declaring all the functions. So just
 1033      * do it the slow and stupid way. Sigh.
 1034      */
 1035 
 1036     value = getpid();
 1037     update_PROCINFO_num("pid", value);
 1038 
 1039     value = getppid();
 1040     update_PROCINFO_num("ppid", value);
 1041 
 1042     value = getuid();
 1043     update_PROCINFO_num("uid", value);
 1044 
 1045     value = geteuid();
 1046     update_PROCINFO_num("euid", value);
 1047 
 1048     value = getgid();
 1049     update_PROCINFO_num("gid", value);
 1050 
 1051     value = getegid();
 1052     update_PROCINFO_num("egid", value);
 1053 
 1054     update_PROCINFO_str("FS", current_field_sep_str());
 1055 
 1056 #if defined (HAVE_GETGROUPS) && defined(NGROUPS_MAX) && NGROUPS_MAX > 0
 1057     for (i = 0; i < ngroups; i++) {
 1058         sprintf(name, "group%d", i + 1);
 1059         value = groupset[i];
 1060         update_PROCINFO_num(name, value);
 1061     }
 1062     if (groupset) {
 1063         efree(groupset);
 1064         groupset = NULL;
 1065     }
 1066 #endif
 1067     load_procinfo_argv();
 1068     return PROCINFO_node;
 1069 }
 1070 
 1071 /* is_std_var --- return true if a variable is a standard variable */
 1072 
 1073 int
 1074 is_std_var(const char *var)
 1075 {
 1076     const struct varinit *vp;
 1077 
 1078     for (vp = varinit; vp->name != NULL; vp++) {
 1079         if (strcmp(vp->name, var) == 0) {
 1080             if ((do_traditional || do_posix) && (vp->flags & NON_STANDARD) != 0)
 1081                 return false;
 1082 
 1083             return true;
 1084         }
 1085     }
 1086 
 1087     return false;
 1088 }
 1089 
 1090 /*
 1091  * is_off_limits_var --- return true if a variable is off limits
 1092  *          to extension functions
 1093  */
 1094 
 1095 int
 1096 is_off_limits_var(const char *var)
 1097 {
 1098     const struct varinit *vp;
 1099 
 1100     for (vp = varinit; vp->name != NULL; vp++) {
 1101         if (strcmp(vp->name, var) == 0)
 1102             return ((vp->flags & NOT_OFF_LIMITS) == 0);
 1103     }
 1104 
 1105     return false;
 1106 }
 1107 
 1108 /* get_spec_varname --- return the name of a special variable
 1109     with the given assign or update routine.
 1110 */
 1111 
 1112 const char *
 1113 get_spec_varname(Func_ptr fptr)
 1114 {
 1115     const struct varinit *vp;
 1116 
 1117     if (! fptr)
 1118         return NULL;
 1119     for (vp = varinit; vp->name != NULL; vp++) {
 1120         if (vp->assign == fptr || vp->update == fptr)
 1121             return vp->name;
 1122     }
 1123     return NULL;
 1124 }
 1125 
 1126 
 1127 /* arg_assign --- process a command-line assignment */
 1128 
 1129 int
 1130 arg_assign(char *arg, bool initing)
 1131 {
 1132     char *cp, *cp2;
 1133     bool badvar;
 1134     NODE *var;
 1135     NODE *it;
 1136     NODE **lhs;
 1137     long save_FNR;
 1138 
 1139     if (! initing && disallow_var_assigns)
 1140         return false;   /* --exec */
 1141 
 1142     cp = strchr(arg, '=');
 1143 
 1144     if (cp == NULL) {
 1145         if (! initing)
 1146             return false;   /* This is file name, not assignment. */
 1147 
 1148         fprintf(stderr,
 1149             _("%s: `%s' argument to `-v' not in `var=value' form\n\n"),
 1150             myname, arg);
 1151         usage(EXIT_FAILURE, stderr);
 1152     }
 1153 
 1154     *cp++ = '\0';
 1155 
 1156     /* avoid false source indications in a fatal message */
 1157     source = NULL;
 1158     sourceline = 0;
 1159     save_FNR = FNR;
 1160     FNR = 0;
 1161 
 1162     /* first check that the variable name has valid syntax */
 1163     badvar = false;
 1164     if (! is_letter((unsigned char) arg[0]))
 1165         badvar = true;
 1166     else
 1167         for (cp2 = arg+1; *cp2; cp2++)
 1168             if (! is_identchar((unsigned char) *cp2) && *cp2 != ':') {
 1169                 badvar = true;
 1170                 break;
 1171             }
 1172 
 1173     if (badvar) {
 1174         if (initing)
 1175             fatal(_("`%s' is not a legal variable name"), arg);
 1176 
 1177         if (do_lint)
 1178             lintwarn(_("`%s' is not a variable name, looking for file `%s=%s'"),
 1179                 arg, arg, cp);
 1180 
 1181         goto done;
 1182     }
 1183 
 1184     // Assigning a string or typed regex
 1185 
 1186     if (! validate_qualified_name(arg)) {
 1187         badvar = true;
 1188         goto done;
 1189     }
 1190 
 1191     if (check_special(arg) >= 0)
 1192         fatal(_("cannot use gawk builtin `%s' as variable name"), arg);
 1193 
 1194     if (! initing) {
 1195         var = lookup(arg);
 1196         if (var != NULL && var->type == Node_func)
 1197             fatal(_("cannot use function `%s' as variable name"), arg);
 1198     }
 1199 
 1200     cp2 = cp + strlen(cp) - 1;  // end char
 1201     if (! do_traditional
 1202         && cp[0] == '@' && cp[1] == '/' && *cp2 == '/') {
 1203         // typed regex
 1204         size_t len = strlen(cp) - 3;
 1205 
 1206         ezalloc(cp2, char *, len + 1, "arg_assign");
 1207         memcpy(cp2, cp + 2, len);
 1208 
 1209         it = make_typed_regex(cp2, len);
 1210         // fall through to variable setup
 1211     } else {
 1212         // string assignment
 1213 
 1214         // POSIX disallows any newlines inside strings
 1215         // The scanner handles that for program files.
 1216         // We have to check here for strings passed to -v.
 1217         if (do_posix && strchr(cp, '\n') != NULL)
 1218             fatal(_("POSIX does not allow physical newlines in string values"));
 1219 
 1220         /*
 1221          * BWK awk expands escapes inside assignments.
 1222          * This makes sense, so we do it too.
 1223          * In addition, remove \-<newline> as in scanning.
 1224          */
 1225         it = make_str_node(cp, strlen(cp), SCAN | ELIDE_BACK_NL);
 1226         it->flags |= USER_INPUT;
 1227 #ifdef LC_NUMERIC
 1228         /*
 1229          * See comment above about locale decimal point.
 1230          */
 1231         if (do_posix)
 1232             setlocale(LC_NUMERIC, "C");
 1233 #endif /* LC_NUMERIC */
 1234         (void) force_number(it);
 1235 #ifdef LC_NUMERIC
 1236         if (do_posix)
 1237             setlocale(LC_NUMERIC, locale);
 1238 #endif /* LC_NUMERIC */
 1239     }
 1240 
 1241     /*
 1242      * since we are restoring the original text of ARGV later,
 1243      * need to copy the variable name part if we don't want
 1244      * name like v=abc instead of just v in var->vname
 1245      */
 1246 
 1247     cp2 = estrdup(arg, cp - arg);   /* var name */
 1248 
 1249     var = variable(0, cp2, Node_var);
 1250     if (var == NULL)    /* error */
 1251         final_exit(EXIT_FATAL);
 1252 
 1253     if (var->type == Node_var && var->var_update)
 1254         var->var_update();
 1255     lhs = get_lhs(var, false);
 1256     unref(*lhs);
 1257     *lhs = it;
 1258     /* check for set_FOO() routine */
 1259     if (var->type == Node_var && var->var_assign)
 1260         var->var_assign();
 1261 
 1262 done:
 1263     if (! initing)
 1264         *--cp = '=';    /* restore original text of ARGV */
 1265     FNR = save_FNR;
 1266     return ! badvar;
 1267 }
 1268 
 1269 /* catchsig --- catch signals */
 1270 
 1271 static void
 1272 catchsig(int sig)
 1273 {
 1274     if (sig == SIGFPE) {
 1275         fatal(_("floating point exception"));
 1276     } else if (sig == SIGSEGV
 1277 #ifdef SIGBUS
 1278             || sig == SIGBUS
 1279 #endif
 1280     ) {
 1281         if (errcount > 0)   // assume a syntax error corrupted our data structures
 1282             exit(EXIT_FATAL);
 1283 
 1284         set_loc(__FILE__, __LINE__);
 1285         msg(_("fatal error: internal error"));
 1286         /* fatal won't abort() if not compiled for debugging */
 1287         // GLIBC 2.27 doesn't necessarily flush on abort. Sigh.
 1288         fflush(NULL);
 1289         abort();
 1290     } else
 1291         cant_happen();
 1292     /* NOTREACHED */
 1293 }
 1294 
 1295 #ifdef HAVE_LIBSIGSEGV
 1296 /* catchsegv --- for use with libsigsegv */
 1297 
 1298 static int
 1299 catchsegv(void *fault_address, int serious)
 1300 {
 1301     if (errcount > 0)   // assume a syntax error corrupted our data structures
 1302         exit(EXIT_FATAL);
 1303 
 1304     set_loc(__FILE__, __LINE__);
 1305     msg(_("fatal error: internal error: segfault"));
 1306     fflush(NULL);
 1307     abort();
 1308     /*NOTREACHED*/
 1309     return 0;
 1310 }
 1311 
 1312 /* catchstackoverflow --- for use with libsigsegv */
 1313 
 1314 static void
 1315 catchstackoverflow(int emergency, stackoverflow_context_t scp)
 1316 {
 1317     set_loc(__FILE__, __LINE__);
 1318     msg(_("fatal error: internal error: stack overflow"));
 1319     fflush(NULL);
 1320     abort();
 1321     /*NOTREACHED*/
 1322     return;
 1323 }
 1324 #endif /* HAVE_LIBSIGSEGV */
 1325 
 1326 /* nostalgia --- print the famous error message and die */
 1327 
 1328 static void
 1329 nostalgia()
 1330 {
 1331     /*
 1332      * N.B.: This string is not gettextized, on purpose.
 1333      * So there.
 1334      */
 1335     fprintf(stderr, "awk: bailing out near line 1\n");
 1336     fflush(stderr);
 1337     abort();
 1338 }
 1339 
 1340 /* version --- print version message */
 1341 
 1342 static void
 1343 version()
 1344 {
 1345     printf("%s", version_string);
 1346 #ifdef DYNAMIC
 1347     printf(", API: %d.%d", GAWK_API_MAJOR_VERSION, GAWK_API_MINOR_VERSION);
 1348 #endif
 1349 #ifdef HAVE_MPFR
 1350     printf(" (GNU MPFR %s, GNU MP %s)", mpfr_get_version(), gmp_version);
 1351 #endif
 1352     printf("\n");
 1353     print_ext_versions();
 1354 
 1355     /*
 1356      * Per GNU coding standards, print copyright info,
 1357      * then exit successfully, do nothing else.
 1358      */
 1359     copyleft();
 1360     exit(EXIT_SUCCESS);
 1361 }
 1362 
 1363 /* init_fds --- check for 0, 1, 2, open on /dev/null if possible */
 1364 
 1365 static void
 1366 init_fds()
 1367 {
 1368     struct stat sbuf;
 1369     int fd;
 1370     int newfd;
 1371     char const *const opposite_mode[] = {"w", "r", "r"};
 1372 
 1373     /* maybe no stderr, don't bother with error mesg */
 1374     for (fd = 0; fd <= 2; fd++) {
 1375         if (fstat(fd, &sbuf) < 0) {
 1376 #if MAKE_A_HEROIC_EFFORT
 1377             if (do_lint)
 1378                 lintwarn(_("no pre-opened fd %d"), fd);
 1379 #endif
 1380             newfd = devopen("/dev/null", opposite_mode[fd]);
 1381             /* turn off some compiler warnings "set but not used" */
 1382             newfd += 0;
 1383 #ifdef MAKE_A_HEROIC_EFFORT
 1384             if (do_lint && newfd < 0)
 1385                 lintwarn(_("could not pre-open /dev/null for fd %d"), fd);
 1386 #endif
 1387         }
 1388     }
 1389 }
 1390 
 1391 /* init_groupset --- initialize groupset */
 1392 
 1393 static void
 1394 init_groupset()
 1395 {
 1396 #if defined(HAVE_GETGROUPS) && defined(NGROUPS_MAX) && NGROUPS_MAX > 0
 1397 #ifdef GETGROUPS_NOT_STANDARD
 1398     /* For systems that aren't standards conformant, use old way. */
 1399     ngroups = NGROUPS_MAX;
 1400 #else
 1401     /*
 1402      * If called with 0 for both args, return value is
 1403      * total number of groups.
 1404      */
 1405     ngroups = getgroups(0, NULL);
 1406 #endif
 1407     /* If an error or no groups, just give up and get on with life. */
 1408     if (ngroups <= 0)
 1409         return;
 1410 
 1411     /* fill in groups */
 1412     emalloc(groupset, GETGROUPS_T *, ngroups * sizeof(GETGROUPS_T), "init_groupset");
 1413 
 1414     ngroups = getgroups(ngroups, groupset);
 1415     /* same thing here, give up but keep going */
 1416     if (ngroups == -1) {
 1417         efree(groupset);
 1418         ngroups = 0;
 1419         groupset = NULL;
 1420     }
 1421 #endif
 1422 }
 1423 
 1424 /* estrdup --- duplicate a string */
 1425 
 1426 char *
 1427 estrdup(const char *str, size_t len)
 1428 {
 1429     char *s;
 1430     emalloc(s, char *, len + 1, "estrdup");
 1431     memcpy(s, str, len);
 1432     s[len] = '\0';
 1433     return s;
 1434 }
 1435 
 1436 #if defined(HAVE_LOCALE_H)
 1437 
 1438 /* init_locale --- initialize locale info. */
 1439 
 1440 /*
 1441  * On some operating systems, the pointers in the struct returned
 1442  * by localeconv() can become dangling pointers after a call to
 1443  * setlocale().  So we do a deep copy.
 1444  *
 1445  * Thanks to KIMURA Koichi <kimura.koichi@canon.co.jp>.
 1446  */
 1447 
 1448 static void
 1449 init_locale(struct lconv *l)
 1450 {
 1451     struct lconv *t;
 1452 
 1453     t = localeconv();
 1454     *l = *t;
 1455     l->thousands_sep = estrdup(t->thousands_sep, strlen(t->thousands_sep));
 1456     l->decimal_point = estrdup(t->decimal_point, strlen(t->decimal_point));
 1457     l->grouping = estrdup(t->grouping, strlen(t->grouping));
 1458     l->int_curr_symbol = estrdup(t->int_curr_symbol, strlen(t->int_curr_symbol));
 1459     l->currency_symbol = estrdup(t->currency_symbol, strlen(t->currency_symbol));
 1460     l->mon_decimal_point = estrdup(t->mon_decimal_point, strlen(t->mon_decimal_point));
 1461     l->mon_thousands_sep = estrdup(t->mon_thousands_sep, strlen(t->mon_thousands_sep));
 1462     l->mon_grouping = estrdup(t->mon_grouping, strlen(t->mon_grouping));
 1463     l->positive_sign = estrdup(t->positive_sign, strlen(t->positive_sign));
 1464     l->negative_sign = estrdup(t->negative_sign, strlen(t->negative_sign));
 1465 }
 1466 #endif /* LOCALE_H */
 1467 
 1468 /* save_argv --- save argv array */
 1469 
 1470 static void
 1471 save_argv(int argc, char **argv)
 1472 {
 1473     int i;
 1474 
 1475     emalloc(d_argv, char **, (argc + 1) * sizeof(char *), "save_argv");
 1476     for (i = 0; i < argc; i++)
 1477         d_argv[i] = estrdup(argv[i], strlen(argv[i]));
 1478     d_argv[argc] = NULL;
 1479 }
 1480 
 1481 /*
 1482  * update_global_values --- make sure the symbol table has correct values.
 1483  * Called from the grammar before dumping values.
 1484  *
 1485  * Also called when accessing through SYMTAB, and from api_sym_lookup().
 1486  */
 1487 
 1488 void
 1489 update_global_values()
 1490 {
 1491     const struct varinit *vp;
 1492 
 1493     for (vp = varinit; vp->name; vp++) {
 1494         if (vp->update != NULL)
 1495             vp->update();
 1496     }
 1497 }
 1498 
 1499 /* getenv_long --- read a long value (>= 0) from an environment var. */
 1500 
 1501 long
 1502 getenv_long(const char *name)
 1503 {
 1504     const char *val;
 1505     long newval;
 1506     if ((val = getenv(name)) != NULL && isdigit((unsigned char) *val)) {
 1507         for (newval = 0; *val && isdigit((unsigned char) *val); val++)
 1508             newval = (newval * 10) + *val - '0';
 1509         return newval;
 1510     }
 1511     return -1;
 1512 }
 1513 
 1514 /* parse_args --- do the getopt_long thing */
 1515 
 1516 static void
 1517 parse_args(int argc, char **argv)
 1518 {
 1519     /*
 1520      * The + on the front tells GNU getopt not to rearrange argv.
 1521      */
 1522     const char *optlist = "+F:f:v:W;bcCd::D::e:E:ghi:l:L::nNo::Op::MPrSstVYZ:";
 1523     int old_optind;
 1524     int c;
 1525     char *scan;
 1526     char *src;
 1527 
 1528     /* we do error messages ourselves on invalid options */
 1529     opterr = false;
 1530 
 1531     /* copy argv before getopt gets to it; used to restart the debugger */
 1532     save_argv(argc, argv);
 1533 
 1534     /* option processing. ready, set, go! */
 1535     for (optopt = 0, old_optind = 1;
 1536          (c = getopt_long(argc, argv, optlist, optab, NULL)) != EOF;
 1537          optopt = 0, old_optind = optind) {
 1538         if (do_posix)
 1539             opterr = true;
 1540 
 1541         switch (c) {
 1542         case 'F':
 1543             add_preassign(PRE_ASSIGN_FS, optarg);
 1544             break;
 1545 
 1546         case 'E':
 1547             disallow_var_assigns = true;
 1548             /* fall through */
 1549         case 'f':
 1550             /*
 1551              * Allow multiple -f options.
 1552              * This makes function libraries real easy.
 1553              * Most of the magic is in the scanner.
 1554              *
 1555              * The following is to allow for whitespace at the end
 1556              * of a #! /bin/gawk line in an executable file
 1557              */
 1558             scan = optarg;
 1559             if (argv[optind-1] != optarg)
 1560                 while (isspace((unsigned char) *scan))
 1561                     scan++;
 1562             src = (*scan == '\0' ? argv[optind++] : optarg);
 1563             (void) add_srcfile((src && src[0] == '-' && src[1] == '\0') ?
 1564                     SRC_STDIN : SRC_FILE,
 1565                     src, srcfiles, NULL, NULL);
 1566 
 1567             break;
 1568 
 1569         case 'v':
 1570             add_preassign(PRE_ASSIGN, optarg);
 1571             break;
 1572 
 1573         case 'b':
 1574             do_binary = true;
 1575             break;
 1576 
 1577         case 'c':
 1578             do_flags |= DO_TRADITIONAL;
 1579             break;
 1580 
 1581         case 'C':
 1582             copyleft();
 1583             break;
 1584 
 1585         case 'd':
 1586             do_flags |= DO_DUMP_VARS;
 1587             if (optarg != NULL && optarg[0] != '\0')
 1588                 varfile = optarg;
 1589             break;
 1590 
 1591         case 'D':
 1592             do_flags |= DO_DEBUG;
 1593             if (optarg != NULL && optarg[0] != '\0')
 1594                 command_file = optarg;
 1595             break;
 1596 
 1597         case 'e':
 1598             if (optarg[0] == '\0')
 1599                 warning(_("empty argument to `-e/--source' ignored"));
 1600             else
 1601                 (void) add_srcfile(SRC_CMDLINE, optarg, srcfiles, NULL, NULL);
 1602             break;
 1603 
 1604         case 'g':
 1605             do_flags |= DO_INTL;
 1606             break;
 1607 
 1608         case 'h':
 1609             /* write usage to stdout, per GNU coding stds */
 1610             usage(EXIT_SUCCESS, stdout);
 1611             break;
 1612 
 1613         case 'i':
 1614             (void) add_srcfile(SRC_INC, optarg, srcfiles, NULL, NULL);
 1615             break;
 1616 
 1617         case 'l':
 1618             (void) add_srcfile(SRC_EXTLIB, optarg, srcfiles, NULL, NULL);
 1619             break;
 1620 
 1621 #ifndef NO_LINT
 1622         case 'L':
 1623             do_flags |= (DO_LINT_ALL|DO_LINT_EXTENSIONS);
 1624             if (optarg != NULL) {
 1625                 if (strcmp(optarg, "fatal") == 0)
 1626                     lintfunc = r_fatal;
 1627                 else if (strcmp(optarg, "invalid") == 0) {
 1628                     do_flags &= ~DO_LINT_ALL;
 1629                     do_flags |= DO_LINT_INVALID;
 1630                 }
 1631                 else if (strcmp(optarg, "no-ext") == 0) {
 1632                     do_flags &= ~DO_LINT_EXTENSIONS;
 1633                 }
 1634             }
 1635             break;
 1636 
 1637         case 't':
 1638             do_flags |= DO_LINT_OLD;
 1639             break;
 1640 #else
 1641         case 'L':
 1642         case 't':
 1643             break;
 1644 #endif
 1645 
 1646         case 'n':
 1647             do_flags |= DO_NON_DEC_DATA;
 1648             break;
 1649 
 1650         case 'N':
 1651             use_lc_numeric = true;
 1652             break;
 1653 
 1654         case 'O':
 1655             do_optimize = true;
 1656             break;
 1657 
 1658         case 'p':
 1659             if (do_pretty_print)
 1660                 warning(_("`--profile' overrides `--pretty-print'"));
 1661             do_flags |= DO_PROFILE;
 1662             /* fall through */
 1663         case 'o':
 1664             if (c == 'o' && do_profile)
 1665                 warning(_("`--profile' overrides `--pretty-print'"));
 1666             do_flags |= DO_PRETTY_PRINT;
 1667             if (optarg != NULL)
 1668                 set_prof_file(optarg);
 1669             else
 1670                 set_prof_file(DEFAULT_PROFILE);
 1671             break;
 1672 
 1673         case 'M':
 1674 #ifdef HAVE_MPFR
 1675             do_flags |= DO_MPFR;
 1676 #else
 1677             warning(_("-M ignored: MPFR/GMP support not compiled in"));
 1678 #endif
 1679             break;
 1680 
 1681         case 'P':
 1682             do_flags |= DO_POSIX;
 1683             break;
 1684 
 1685         case 'r':
 1686             do_flags |= DO_INTERVALS;
 1687             break;
 1688 
 1689         case 's':
 1690             do_optimize = false;
 1691             break;
 1692 
 1693         case 'S':
 1694             do_flags |= DO_SANDBOX;
 1695             break;
 1696 
 1697         case 'V':
 1698             do_version = true;
 1699             break;
 1700 
 1701         case 'W':       /* gawk specific options - now in getopt_long */
 1702             fprintf(stderr, _("%s: option `-W %s' unrecognized, ignored\n"),
 1703                 argv[0], optarg);
 1704             break;
 1705 
 1706         case 0:
 1707             /*
 1708              * getopt_long found an option that sets a variable
 1709              * instead of returning a letter. Do nothing, just
 1710              * cycle around for the next one.
 1711              */
 1712             break;
 1713 
 1714         case 'Y':
 1715         case 'Z':
 1716 #if defined(YYDEBUG) || defined(GAWKDEBUG)
 1717             if (c == 'Y') {
 1718                 yydebug = 2;
 1719                 break;
 1720             }
 1721 #endif
 1722 #if defined(LOCALEDEBUG)
 1723             if (c == 'Z') {
 1724                 locale = optarg;
 1725                 break;
 1726             }
 1727 #endif
 1728             /* if not debugging, fall through */
 1729         case '?':
 1730         default:
 1731             /*
 1732              * If not posix, an unrecognized option stops argument
 1733              * processing so that it can go into ARGV for the awk
 1734              * program to see. This makes use of ``#! /bin/gawk -f''
 1735              * easier.
 1736              *
 1737              * However, it's never simple. If optopt is set,
 1738              * an option that requires an argument didn't get the
 1739              * argument. We care because if opterr is 0, then
 1740              * getopt_long won't print the error message for us.
 1741              */
 1742             if (! do_posix
 1743                 && (optopt == '\0' || strchr(optlist, optopt) == NULL)) {
 1744                 /*
 1745                  * can't just do optind--. In case of an
 1746                  * option with >= 2 letters, getopt_long
 1747                  * won't have incremented optind.
 1748                  */
 1749                 optind = old_optind;
 1750                 stopped_early = true;
 1751                 goto out;
 1752             } else if (optopt != '\0') {
 1753                 /* Use POSIX required message format */
 1754                 fprintf(stderr,
 1755                     _("%s: option requires an argument -- %c\n"),
 1756                     myname, optopt);
 1757                 usage(EXIT_FAILURE, stderr);
 1758             }
 1759             /* else
 1760                 let getopt print error message for us */
 1761             break;
 1762         }
 1763         if (c == 'E')   /* --exec ends option processing */
 1764             break;
 1765     }
 1766 out:
 1767     do_optimize = (do_optimize && ! do_pretty_print);
 1768 
 1769     return;
 1770 }
 1771 
 1772 /* set_locale_stuff --- setup the locale stuff */
 1773 
 1774 static void
 1775 set_locale_stuff(void)
 1776 {
 1777 #if defined(LC_CTYPE)
 1778     setlocale(LC_CTYPE, locale);
 1779 #endif
 1780 #if defined(LC_COLLATE)
 1781     setlocale(LC_COLLATE, locale);
 1782 #endif
 1783 #if defined(LC_MESSAGES)
 1784     setlocale(LC_MESSAGES, locale);
 1785 #endif
 1786 #if defined(LC_NUMERIC) && defined(HAVE_LOCALE_H)
 1787     /*
 1788      * Force the issue here.  According to POSIX 2001, decimal
 1789      * point is used for parsing source code and for command-line
 1790      * assignments and the locale value for processing input,
 1791      * number to string conversion, and printing output.
 1792      *
 1793      * 10/2005 --- see below also; we now only use the locale's
 1794      * decimal point if do_posix in effect.
 1795      *
 1796      * 9/2007:
 1797      * This is a mess. We need to get the locale's numeric info for
 1798      * the thousands separator for the %'d flag.
 1799      */
 1800     setlocale(LC_NUMERIC, locale);
 1801     init_locale(& loc);
 1802     setlocale(LC_NUMERIC, "C");
 1803 #endif
 1804 #if defined(LC_TIME)
 1805     setlocale(LC_TIME, locale);
 1806 #endif
 1807 
 1808     /* These must be done after calling setlocale */
 1809     (void) bindtextdomain(PACKAGE, locale_dir);
 1810     (void) textdomain(PACKAGE);
 1811 }
 1812 
 1813 /* platform_name --- return the platform name */
 1814 
 1815 static const char *
 1816 platform_name()
 1817 {
 1818     // Cygwin and Mac OS X count as POSIX
 1819 #if defined(__VMS)
 1820     return "vms";
 1821 #elif defined(__MINGW32__)
 1822     return "mingw";
 1823 #elif defined(__DJGPP__)
 1824     return "djgpp";
 1825 #elif defined(__EMX__)
 1826     return "os2";
 1827 #elif defined(USE_EBCDIC)
 1828     return "os390";
 1829 #else
 1830     return "posix";
 1831 #endif
 1832 }
 1833 
 1834 /* set_current_namespace --- set current_namespace and handle memory management */
 1835 
 1836 void
 1837 set_current_namespace(const char *new_namespace)
 1838 {
 1839     if (current_namespace != awk_namespace)
 1840         efree((void *) current_namespace);
 1841 
 1842     current_namespace = new_namespace;
 1843 }