"Fossies" - the Fresh Open Source Software Archive

Member "gawk-5.1.0/awkgram.y" (10 Apr 2020, 165605 Bytes) of package /linux/misc/gawk-5.1.0.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Bison source code syntax highlighting (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file. See also the latest Fossies "Diffs" side-by-side code changes report for "awkgram.y": 5.0.1_vs_5.1.0.

    1 /*
    2  * awkgram.y --- yacc/bison parser
    3  */
    4 
    5 /*
    6  * Copyright (C) 1986, 1988, 1989, 1991-2020 the Free Software Foundation, Inc.
    7  *
    8  * This file is part of GAWK, the GNU implementation of the
    9  * AWK Programming Language.
   10  *
   11  * GAWK is free software; you can redistribute it and/or modify
   12  * it under the terms of the GNU General Public License as published by
   13  * the Free Software Foundation; either version 3 of the License, or
   14  * (at your option) any later version.
   15  *
   16  * GAWK is distributed in the hope that it will be useful,
   17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
   18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   19  * GNU General Public License for more details.
   20  *
   21  * You should have received a copy of the GNU General Public License
   22  * along with this program; if not, write to the Free Software
   23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
   24  */
   25 
   26 %{
   27 #ifdef GAWKDEBUG
   28 #define YYDEBUG 12
   29 #endif
   30 
   31 #include "awk.h"
   32 
   33 #if defined(__STDC__) && __STDC__ < 1   /* VMS weirdness, maybe elsewhere */
   34 #define signed /**/
   35 #endif
   36 
   37 static void yyerror(const char *m, ...) ATTRIBUTE_PRINTF_1;
   38 static void error_ln(int line, const char *m, ...) ATTRIBUTE_PRINTF_2;
   39 static void lintwarn_ln(int line, const char *m, ...) ATTRIBUTE_PRINTF_2;
   40 static void warning_ln(int line, const char *m, ...) ATTRIBUTE_PRINTF_2;
   41 static char *get_src_buf(void);
   42 static int yylex(void);
   43 int yyparse(void);
   44 static INSTRUCTION *snode(INSTRUCTION *subn, INSTRUCTION *op);
   45 static char **check_params(char *fname, int pcount, INSTRUCTION *list);
   46 static int install_function(char *fname, INSTRUCTION *fi, INSTRUCTION *plist);
   47 static NODE *mk_rexp(INSTRUCTION *exp);
   48 static void param_sanity(INSTRUCTION *arglist);
   49 static int parms_shadow(INSTRUCTION *pc, bool *shadow);
   50 #ifndef NO_LINT
   51 static int isnoeffect(OPCODE type);
   52 #endif
   53 static INSTRUCTION *make_assignable(INSTRUCTION *ip);
   54 static void dumpintlstr(const char *str, size_t len);
   55 static void dumpintlstr2(const char *str1, size_t len1, const char *str2, size_t len2);
   56 static bool include_source(INSTRUCTION *file, void **srcfile_p);
   57 static bool load_library(INSTRUCTION *file, void **srcfile_p);
   58 static void set_namespace(INSTRUCTION *ns, INSTRUCTION *comment);
   59 static void next_sourcefile(void);
   60 static char *tokexpand(void);
   61 static NODE *set_profile_text(NODE *n, const char *str, size_t len);
   62 static int check_qualified_special(char *token);
   63 static char *qualify_name(const char *name, size_t len);
   64 static INSTRUCTION *trailing_comment;
   65 static INSTRUCTION *outer_comment;
   66 static INSTRUCTION *interblock_comment;
   67 static INSTRUCTION *pending_comment;
   68 static INSTRUCTION *namespace_chain;
   69 
   70 #ifdef DEBUG_COMMENTS
   71 static void
   72 debug_print_comment_s(const char *name, INSTRUCTION *comment, int line)
   73 {
   74     if (comment != NULL)
   75         fprintf(stderr, "%d: %s: <%.*s>\n", line, name,
   76                 (int) (comment->memory->stlen - 1),
   77                 comment->memory->stptr);
   78 }
   79 #define debug_print_comment(comment) \
   80      debug_print_comment_s(# comment, comment, __LINE__)
   81 #endif
   82 
   83 #define instruction(t)  bcalloc(t, 1, 0)
   84 
   85 static INSTRUCTION *mk_program(void);
   86 static INSTRUCTION *append_rule(INSTRUCTION *pattern, INSTRUCTION *action);
   87 static INSTRUCTION *mk_function(INSTRUCTION *fi, INSTRUCTION *def);
   88 static INSTRUCTION *mk_condition(INSTRUCTION *cond, INSTRUCTION *ifp, INSTRUCTION *true_branch,
   89         INSTRUCTION *elsep, INSTRUCTION *false_branch);
   90 static INSTRUCTION *mk_expression_list(INSTRUCTION *list, INSTRUCTION *s1);
   91 static INSTRUCTION *mk_for_loop(INSTRUCTION *forp, INSTRUCTION *init, INSTRUCTION *cond,
   92         INSTRUCTION *incr, INSTRUCTION *body);
   93 static void fix_break_continue(INSTRUCTION *list, INSTRUCTION *b_target, INSTRUCTION *c_target);
   94 static INSTRUCTION *mk_binary(INSTRUCTION *s1, INSTRUCTION *s2, INSTRUCTION *op);
   95 static INSTRUCTION *mk_boolean(INSTRUCTION *left, INSTRUCTION *right, INSTRUCTION *op);
   96 static INSTRUCTION *mk_assignment(INSTRUCTION *lhs, INSTRUCTION *rhs, INSTRUCTION *op);
   97 static INSTRUCTION *mk_getline(INSTRUCTION *op, INSTRUCTION *opt_var, INSTRUCTION *redir, int redirtype);
   98 static int count_expressions(INSTRUCTION **list, bool isarg);
   99 static INSTRUCTION *optimize_assignment(INSTRUCTION *exp);
  100 static void add_lint(INSTRUCTION *list, LINTTYPE linttype);
  101 
  102 enum defref { FUNC_DEFINE, FUNC_USE, FUNC_EXT };
  103 static void func_use(const char *name, enum defref how);
  104 static void check_funcs(void);
  105 
  106 static ssize_t read_one_line(int fd, void *buffer, size_t count);
  107 static int one_line_close(int fd);
  108 static void merge_comments(INSTRUCTION *c1, INSTRUCTION *c2);
  109 static INSTRUCTION *make_braced_statements(INSTRUCTION *lbrace, INSTRUCTION *stmts, INSTRUCTION *rbrace);
  110 static void add_sign_to_num(NODE *n, char sign);
  111 
  112 static bool at_seen = false;
  113 static bool want_source = false;
  114 static bool want_regexp = false;    /* lexical scanning kludge */
  115 static enum {
  116     FUNC_HEADER,
  117     FUNC_BODY,
  118     DONT_CHECK
  119 } want_param_names = DONT_CHECK;    /* ditto */
  120 static bool in_function;        /* parsing kludge */
  121 static int rule = 0;
  122 
  123 const char *const ruletab[] = {
  124     "?",
  125     "BEGIN",
  126     "Rule",
  127     "END",
  128     "BEGINFILE",
  129     "ENDFILE",
  130 };
  131 
  132 static bool in_print = false;   /* lexical scanning kludge for print */
  133 static int in_parens = 0;   /* lexical scanning kludge for print */
  134 static int sub_counter = 0; /* array dimension counter for use in delete */
  135 static char *lexptr;        /* pointer to next char during parsing */
  136 static char *lexend;        /* end of buffer */
  137 static char *lexptr_begin;  /* keep track of where we were for error msgs */
  138 static char *lexeme;        /* beginning of lexeme for debugging */
  139 static bool lexeof;     /* seen EOF for current source? */
  140 static char *thisline = NULL;
  141 static int in_braces = 0;   /* count braces for firstline, lastline in an 'action' */
  142 static int lastline = 0;
  143 static int firstline = 0;
  144 static SRCFILE *sourcefile = NULL;  /* current program source */
  145 static int lasttok = 0;
  146 static bool eof_warned = false; /* GLOBAL: want warning for each file */
  147 static int break_allowed;   /* kludge for break */
  148 static int continue_allowed;    /* kludge for continue */
  149 
  150 #define END_FILE    -1000
  151 #define END_SRC     -2000
  152 
  153 #define YYDEBUG_LEXER_TEXT (lexeme)
  154 static char *tokstart = NULL;
  155 static char *tok = NULL;
  156 static char *tokend;
  157 int errcount = 0;
  158 
  159 extern char *source;
  160 extern int sourceline;
  161 extern SRCFILE *srcfiles;
  162 extern INSTRUCTION *rule_list;
  163 extern int max_args;
  164 extern NODE **args_array;
  165 
  166 const char awk_namespace[] = "awk";
  167 const char *current_namespace = awk_namespace;
  168 bool namespace_changed = false;
  169 
  170 static INSTRUCTION *rule_block[sizeof(ruletab)];
  171 
  172 static INSTRUCTION *ip_rec;
  173 static INSTRUCTION *ip_newfile;
  174 static INSTRUCTION *ip_atexit = NULL;
  175 static INSTRUCTION *ip_end;
  176 static INSTRUCTION *ip_endfile;
  177 static INSTRUCTION *ip_beginfile;
  178 INSTRUCTION *main_beginfile;
  179 static bool called_from_eval = false;
  180 
  181 static inline INSTRUCTION *list_create(INSTRUCTION *x);
  182 static inline INSTRUCTION *list_append(INSTRUCTION *l, INSTRUCTION *x);
  183 static inline INSTRUCTION *list_prepend(INSTRUCTION *l, INSTRUCTION *x);
  184 static inline INSTRUCTION *list_merge(INSTRUCTION *l1, INSTRUCTION *l2);
  185 
  186 extern double fmod(double x, double y);
  187 
  188 #define YYSTYPE INSTRUCTION *
  189 %}
  190 
  191 %token FUNC_CALL NAME REGEXP FILENAME
  192 %token YNUMBER YSTRING TYPED_REGEXP
  193 %token RELOP IO_OUT IO_IN
  194 %token ASSIGNOP ASSIGN MATCHOP CONCAT_OP
  195 %token SUBSCRIPT
  196 %token LEX_BEGIN LEX_END LEX_IF LEX_ELSE LEX_RETURN LEX_DELETE
  197 %token LEX_SWITCH LEX_CASE LEX_DEFAULT LEX_WHILE LEX_DO LEX_FOR LEX_BREAK LEX_CONTINUE
  198 %token LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT LEX_FUNCTION
  199 %token LEX_BEGINFILE LEX_ENDFILE
  200 %token LEX_GETLINE LEX_NEXTFILE
  201 %token LEX_IN
  202 %token LEX_AND LEX_OR INCREMENT DECREMENT
  203 %token LEX_BUILTIN LEX_LENGTH
  204 %token LEX_EOF
  205 %token LEX_INCLUDE LEX_EVAL LEX_LOAD LEX_NAMESPACE
  206 %token NEWLINE
  207 
  208 /* Lowest to highest */
  209 %right ASSIGNOP ASSIGN SLASH_BEFORE_EQUAL
  210 %right '?' ':'
  211 %left LEX_OR
  212 %left LEX_AND
  213 %left LEX_GETLINE
  214 %nonassoc LEX_IN
  215 %left FUNC_CALL LEX_BUILTIN LEX_LENGTH
  216 %nonassoc ','
  217 %left MATCHOP
  218 %nonassoc RELOP '<' '>' IO_IN IO_OUT
  219 %left CONCAT_OP
  220 %left YSTRING YNUMBER TYPED_REGEXP
  221 %left '+' '-'
  222 %left '*' '/' '%'
  223 %right '!' UNARY
  224 %right '^'
  225 %left INCREMENT DECREMENT
  226 %left '$'
  227 %left '(' ')'
  228 %%
  229 
  230 program
  231     : /* empty */
  232       { $$ = NULL; }
  233     | program rule
  234       {
  235         rule = 0;
  236         yyerrok;
  237       }
  238     | program nls
  239       {
  240         if ($2 != NULL) {
  241             if ($1 == NULL)
  242                 outer_comment = $2;
  243             else
  244                 interblock_comment = $2;
  245         }
  246         $$ = $1;
  247       }
  248     | program LEX_EOF
  249       {
  250         next_sourcefile();
  251       }
  252     | program error
  253       {
  254         rule = 0;
  255         /*
  256          * If errors, give up, don't produce an infinite
  257          * stream of syntax error messages.
  258          */
  259         /* yyerrok; */
  260       }
  261     ;
  262 
  263 rule
  264     : pattern action
  265       {
  266         (void) append_rule($1, $2);
  267         if (pending_comment != NULL) {
  268             interblock_comment = pending_comment;
  269             pending_comment = NULL;
  270         }
  271       }
  272     | pattern statement_term
  273       {
  274         if (rule != Rule) {
  275             msg(_("%s blocks must have an action part"), ruletab[rule]);
  276             errcount++;
  277         } else if ($1 == NULL) {
  278             msg(_("each rule must have a pattern or an action part"));
  279             errcount++;
  280         } else {    /* pattern rule with non-empty pattern */
  281             if ($2 != NULL)
  282                 list_append($1, $2);
  283             (void) append_rule($1, NULL);
  284         }
  285       }
  286     | function_prologue action
  287       {
  288         in_function = false;
  289         (void) mk_function($1, $2);
  290         want_param_names = DONT_CHECK;
  291         if (pending_comment != NULL) {
  292             interblock_comment = pending_comment;
  293             pending_comment = NULL;
  294         }
  295         yyerrok;
  296       }
  297     | '@' LEX_INCLUDE source statement_term
  298       {
  299         want_source = false;
  300         at_seen = false;
  301         if ($3 != NULL && $4 != NULL) {
  302             SRCFILE *s = (SRCFILE *) $3;
  303             s->comment = $4;
  304         }
  305         yyerrok;
  306       }
  307     | '@' LEX_LOAD library statement_term
  308       {
  309         want_source = false;
  310         at_seen = false;
  311         if ($3 != NULL && $4 != NULL) {
  312             SRCFILE *s = (SRCFILE *) $3;
  313             s->comment = $4;
  314         }
  315         yyerrok;
  316       }
  317     | '@' LEX_NAMESPACE namespace statement_term
  318       {
  319         want_source = false;
  320         at_seen = false;
  321 
  322         // this frees $3 storage in all cases
  323         set_namespace($3, $4);
  324 
  325         yyerrok;
  326       }
  327     ;
  328 
  329 source
  330     : FILENAME
  331       {
  332         void *srcfile = NULL;
  333 
  334         if (! include_source($1, & srcfile))
  335             YYABORT;
  336         efree($1->lextok);
  337         bcfree($1);
  338         $$ = (INSTRUCTION *) srcfile;
  339       }
  340     | FILENAME error
  341       { $$ = NULL; }
  342     | error
  343       { $$ = NULL; }
  344     ;
  345 
  346 library
  347     : FILENAME
  348       {
  349         void *srcfile;
  350 
  351         if (! load_library($1, & srcfile))
  352             YYABORT;
  353         efree($1->lextok);
  354         bcfree($1);
  355         $$ = (INSTRUCTION *) srcfile;
  356       }
  357     | FILENAME error
  358       { $$ = NULL; }
  359     | error
  360       { $$ = NULL; }
  361     ;
  362 
  363 namespace
  364     : FILENAME
  365       { $$ = $1; }
  366     | FILENAME error
  367       { $$ = NULL; }
  368     | error
  369       { $$ = NULL; }
  370     ;
  371 
  372 pattern
  373     : /* empty */
  374       {
  375         rule = Rule;
  376         $$ = NULL;
  377       }
  378     | exp
  379       {
  380         rule = Rule;
  381       }
  382 
  383     | exp comma exp
  384       {
  385         INSTRUCTION *tp;
  386 
  387         add_lint($1, LINT_assign_in_cond);
  388         add_lint($3, LINT_assign_in_cond);
  389 
  390         tp = instruction(Op_no_op);
  391         list_prepend($1, bcalloc(Op_line_range, !!do_pretty_print + 1, 0));
  392         $1->nexti->triggered = false;
  393         $1->nexti->target_jmp = $3->nexti;
  394 
  395         list_append($1, instruction(Op_cond_pair));
  396         $1->lasti->line_range = $1->nexti;
  397         $1->lasti->target_jmp = tp;
  398 
  399         list_append($3, instruction(Op_cond_pair));
  400         $3->lasti->line_range = $1->nexti;
  401         $3->lasti->target_jmp = tp;
  402         if (do_pretty_print) {
  403             ($1->nexti + 1)->condpair_left = $1->lasti;
  404             ($1->nexti + 1)->condpair_right = $3->lasti;
  405         }
  406         /* Put any comments in front of the range expression */
  407         if ($2 != NULL)
  408             $$ = list_append(list_merge(list_prepend($1, $2), $3), tp);
  409         else
  410             $$ = list_append(list_merge($1, $3), tp);
  411         rule = Rule;
  412       }
  413     | LEX_BEGIN
  414       {
  415         static int begin_seen = 0;
  416 
  417         if (do_lint_old && ++begin_seen == 2)
  418             lintwarn_ln($1->source_line,
  419                 _("old awk does not support multiple `BEGIN' or `END' rules"));
  420 
  421         $1->in_rule = rule = BEGIN;
  422         $1->source_file = source;
  423         $$ = $1;
  424       }
  425     | LEX_END
  426       {
  427         static int end_seen = 0;
  428 
  429         if (do_lint_old && ++end_seen == 2)
  430             lintwarn_ln($1->source_line,
  431                 _("old awk does not support multiple `BEGIN' or `END' rules"));
  432 
  433         $1->in_rule = rule = END;
  434         $1->source_file = source;
  435         $$ = $1;
  436       }
  437     | LEX_BEGINFILE
  438       {
  439         $1->in_rule = rule = BEGINFILE;
  440         $1->source_file = source;
  441         $$ = $1;
  442       }
  443     | LEX_ENDFILE
  444       {
  445         $1->in_rule = rule = ENDFILE;
  446         $1->source_file = source;
  447         $$ = $1;
  448       }
  449     ;
  450 
  451 action
  452     : l_brace statements r_brace opt_semi opt_nls
  453       {
  454         INSTRUCTION *ip = make_braced_statements($1, $2, $3);
  455 
  456         if ($3 != NULL && $5 != NULL) {
  457             merge_comments($3, $5);
  458             pending_comment = $3;
  459         } else if ($3 != NULL) {
  460             pending_comment = $3;
  461         } else if ($5 != NULL) {
  462             pending_comment = $5;
  463         }
  464 
  465         $$ = ip;
  466       }
  467     ;
  468 
  469 func_name
  470     : NAME
  471     | FUNC_CALL
  472       {
  473         const char *name = $1->lextok;
  474         char *qname = qualify_name(name, strlen(name));
  475 
  476         if (qname != name) {
  477             efree((void *)name);
  478             $1->lextok = qname;
  479         }
  480         $$ = $1;
  481       }
  482     | lex_builtin
  483       {
  484         yyerror(_("`%s' is a built-in function, it cannot be redefined"),
  485                     tokstart);
  486         YYABORT;
  487       }
  488     | '@' LEX_EVAL
  489       {
  490         $$ = $2;
  491         at_seen = false;
  492       }
  493     ;
  494 
  495 lex_builtin
  496     : LEX_BUILTIN
  497     | LEX_LENGTH
  498     ;
  499 
  500 function_prologue
  501     : LEX_FUNCTION func_name '(' { want_param_names = FUNC_HEADER; } opt_param_list r_paren opt_nls
  502       {
  503         INSTRUCTION *func_comment = NULL;
  504         // Merge any comments found in the parameter list with those
  505         // following the function header, associate the whole shebang
  506         // with the function as one block comment.
  507         if ($5 != NULL && $5->comment != NULL) {
  508             if ($7 != NULL) {
  509                 merge_comments($5->comment, $7);
  510             }
  511             func_comment = $5->comment;
  512         } else if ($7 != NULL) {
  513             func_comment = $7;
  514         }
  515 
  516         $1->source_file = source;
  517         $1->comment = func_comment;
  518         if (install_function($2->lextok, $1, $5) < 0)
  519             YYABORT;
  520         in_function = true;
  521         $2->lextok = NULL;
  522         bcfree($2);
  523         /* $5 already free'd in install_function */
  524         $$ = $1;
  525         want_param_names = FUNC_BODY;
  526       }
  527     ;
  528 
  529 regexp
  530     /*
  531      * In this rule, want_regexp tells yylex that the next thing
  532      * is a regexp so it should read up to the closing slash.
  533      */
  534     : a_slash
  535         { want_regexp = true; }
  536       REGEXP    /* The terminating '/' is consumed by yylex(). */
  537         {
  538           NODE *n, *exp;
  539           char *re;
  540           size_t len;
  541 
  542           re = $3->lextok;
  543           $3->lextok = NULL;
  544           len = strlen(re);
  545           if (do_lint) {
  546             if (len == 0)
  547                 lintwarn_ln($3->source_line,
  548                     _("regexp constant `//' looks like a C++ comment, but is not"));
  549             else if (re[0] == '*' && re[len-1] == '*')
  550                 /* possible C comment */
  551                 lintwarn_ln($3->source_line,
  552                     _("regexp constant `/%s/' looks like a C comment, but is not"), re);
  553           }
  554 
  555           exp = make_str_node(re, len, ALREADY_MALLOCED);
  556           n = make_regnode(Node_regex, exp);
  557           if (n == NULL) {
  558             unref(exp);
  559             YYABORT;
  560           }
  561           $$ = $3;
  562           $$->opcode = Op_match_rec;
  563           $$->memory = n;
  564         }
  565     ;
  566 
  567 typed_regexp
  568     : TYPED_REGEXP
  569         {
  570           char *re;
  571           size_t len;
  572 
  573           re = $1->lextok;
  574           $1->lextok = NULL;
  575           len = strlen(re);
  576 
  577           $$ = $1;
  578           $$->opcode = Op_push_re;
  579           $$->memory = make_typed_regex(re, len);
  580         }
  581 
  582 a_slash
  583     : '/'
  584       { bcfree($1); }
  585     | SLASH_BEFORE_EQUAL
  586     ;
  587 
  588 statements
  589     : /* empty */
  590       { $$ = NULL; }
  591     | statements statement
  592       {
  593         if ($2 == NULL) {
  594             $$ = $1;
  595         } else {
  596             add_lint($2, LINT_no_effect);
  597             if ($1 == NULL) {
  598                 $$ = $2;
  599             } else {
  600                 $$ = list_merge($1, $2);
  601             }
  602         }
  603 
  604         if (trailing_comment != NULL) {
  605             $$ = list_append($$, trailing_comment);
  606             trailing_comment = NULL;
  607         }
  608 
  609         yyerrok;
  610       }
  611     | statements error
  612       { $$ = NULL; }
  613     ;
  614 
  615 statement_term
  616     : nls       { $$ = $1; }
  617     | semi opt_nls  { $$ = $2; }
  618     ;
  619 
  620 statement
  621     : semi opt_nls
  622       {
  623         if ($2 != NULL) {
  624             INSTRUCTION *ip;
  625 
  626             merge_comments($2, NULL);
  627             ip = list_create(instruction(Op_no_op));
  628             $$ = list_append(ip, $2); 
  629         } else
  630             $$ = NULL;
  631       }
  632     | l_brace statements r_brace
  633       {
  634         trailing_comment = $3;  // NULL or comment
  635         $$ = make_braced_statements($1, $2, $3);
  636       }
  637     | if_statement
  638       {
  639         if (do_pretty_print)
  640             $$ = list_prepend($1, instruction(Op_exec_count));
  641         else
  642             $$ = $1;
  643       }
  644     | LEX_SWITCH '(' exp r_paren opt_nls l_brace case_statements opt_nls r_brace
  645       {
  646         INSTRUCTION *dflt, *curr = NULL, *cexp, *cstmt;
  647         INSTRUCTION *ip, *nextc, *tbreak;
  648         const char **case_values = NULL;
  649         int maxcount = 128;
  650         int case_count = 0;
  651         int i;
  652 
  653         tbreak = instruction(Op_no_op);
  654         cstmt = list_create(tbreak);
  655         cexp = list_create(instruction(Op_pop));
  656         dflt = instruction(Op_jmp);
  657         dflt->target_jmp = tbreak;  /* if no case match and no explicit default */
  658 
  659         if ($7 != NULL) {
  660             curr = $7->nexti;
  661             bcfree($7); /* Op_list */
  662         }
  663         /*  else
  664             curr = NULL; */
  665 
  666         for (; curr != NULL; curr = nextc) {
  667             INSTRUCTION *caseexp = curr->case_exp;
  668             INSTRUCTION *casestmt = curr->case_stmt;
  669 
  670             nextc = curr->nexti;
  671             if (curr->opcode == Op_K_case) {
  672                 if (caseexp->opcode == Op_push_i) {
  673                     /* a constant scalar */
  674                     char *caseval;
  675                     caseval = force_string(caseexp->memory)->stptr;
  676                     for (i = 0; i < case_count; i++) {
  677                         if (strcmp(caseval, case_values[i]) == 0)
  678                             error_ln(curr->source_line,
  679                                 _("duplicate case values in switch body: %s"), caseval);
  680                     }
  681 
  682                     if (case_values == NULL)
  683                         emalloc(case_values, const char **, sizeof(char *) * maxcount, "statement");
  684                     else if (case_count >= maxcount) {
  685                         maxcount += 128;
  686                         erealloc(case_values, const char **, sizeof(char*) * maxcount, "statement");
  687                     }
  688                     case_values[case_count++] = caseval;
  689                 } else {
  690                     /* match a constant regex against switch expression. */
  691                     (curr + 1)->match_exp = true;
  692                 }
  693                 curr->stmt_start = casestmt->nexti;
  694                 curr->stmt_end  = casestmt->lasti;
  695                 (void) list_prepend(cexp, curr);
  696                 (void) list_prepend(cexp, caseexp);
  697             } else {
  698                 if (dflt->target_jmp != tbreak)
  699                     error_ln(curr->source_line,
  700                         _("duplicate `default' detected in switch body"));
  701                 else
  702                     dflt->target_jmp = casestmt->nexti;
  703 
  704                 if (do_pretty_print) {
  705                     curr->stmt_start = casestmt->nexti;
  706                     curr->stmt_end = casestmt->lasti;
  707                     (void) list_prepend(cexp, curr);
  708                 } else
  709                     bcfree(curr);
  710             }
  711 
  712             cstmt = list_merge(casestmt, cstmt);
  713         }
  714 
  715         if (case_values != NULL)
  716             efree(case_values);
  717 
  718         ip = $3;
  719         if (do_pretty_print) {
  720             // first merge comments
  721             INSTRUCTION *head_comment = NULL;
  722 
  723             if ($5 != NULL && $6 != NULL) {
  724                 merge_comments($5, $6);
  725                 head_comment = $5;
  726             } else if ($5 != NULL)
  727                 head_comment = $5;
  728             else
  729                 head_comment = $6;
  730 
  731             $1->comment = head_comment;
  732 
  733             (void) list_prepend(ip, $1);
  734             (void) list_prepend(ip, instruction(Op_exec_count));
  735             $1->target_break = tbreak;
  736             ($1 + 1)->switch_start = cexp->nexti;
  737             ($1 + 1)->switch_end = cexp->lasti;
  738             ($1 + 1)->switch_end->comment = $9;
  739         }
  740         /* else
  741             $1 is NULL */
  742 
  743         (void) list_append(cexp, dflt);
  744         (void) list_merge(ip, cexp);
  745         if ($8 != NULL)
  746             (void) list_append(cstmt, $8);
  747         $$ = list_merge(ip, cstmt);
  748 
  749         break_allowed--;
  750         fix_break_continue(ip, tbreak, NULL);
  751       }
  752     | LEX_WHILE '(' exp r_paren opt_nls statement
  753       {
  754         /*
  755          *    -----------------
  756          * tc:
  757          *         cond
  758          *    -----------------
  759          *    [Op_jmp_false tb   ]
  760          *    -----------------
  761          *         body
  762          *    -----------------
  763          *    [Op_jmp      tc    ]
  764          * tb:[Op_no_op          ]
  765          */
  766 
  767         INSTRUCTION *ip, *tbreak, *tcont;
  768 
  769         tbreak = instruction(Op_no_op);
  770         add_lint($3, LINT_assign_in_cond);
  771         tcont = $3->nexti;
  772         ip = list_append($3, instruction(Op_jmp_false));
  773         ip->lasti->target_jmp = tbreak;
  774 
  775         if (do_pretty_print) {
  776             (void) list_append(ip, instruction(Op_exec_count));
  777             $1->target_break = tbreak;
  778             $1->target_continue = tcont;
  779             ($1 + 1)->while_body = ip->lasti;
  780             (void) list_prepend(ip, $1);
  781         }
  782         /* else
  783             $1 is NULL */
  784 
  785         if ($5 != NULL) {
  786             if ($6 == NULL)
  787                 $6 = list_create(instruction(Op_no_op));
  788 
  789             $5->memory->comment_type = BLOCK_COMMENT;
  790             $6 = list_prepend($6, $5);
  791         }
  792 
  793         if ($6 != NULL)
  794             (void) list_merge(ip, $6);
  795         (void) list_append(ip, instruction(Op_jmp));
  796         ip->lasti->target_jmp = tcont;
  797         $$ = list_append(ip, tbreak);
  798 
  799         break_allowed--;
  800         continue_allowed--;
  801         fix_break_continue(ip, tbreak, tcont);
  802       }
  803     | LEX_DO opt_nls statement LEX_WHILE '(' exp r_paren opt_nls
  804       {
  805         /*
  806          *    -----------------
  807          * z:
  808          *         body
  809          *    -----------------
  810          * tc:
  811          *         cond
  812          *    -----------------
  813          *    [Op_jmp_true | z  ]
  814          * tb:[Op_no_op         ]
  815          */
  816 
  817         INSTRUCTION *ip, *tbreak, *tcont;
  818 
  819         tbreak = instruction(Op_no_op);
  820         tcont = $6->nexti;
  821         add_lint($6, LINT_assign_in_cond);
  822         if ($3 != NULL)
  823             ip = list_merge($3, $6);
  824         else
  825             ip = list_prepend($6, instruction(Op_no_op));
  826 
  827         if ($2 != NULL)
  828             (void) list_prepend(ip, $2);
  829 
  830         if (do_pretty_print)
  831             (void) list_prepend(ip, instruction(Op_exec_count));
  832 
  833         (void) list_append(ip, instruction(Op_jmp_true));
  834         ip->lasti->target_jmp = ip->nexti;
  835         $$ = list_append(ip, tbreak);
  836 
  837         break_allowed--;
  838         continue_allowed--;
  839         fix_break_continue(ip, tbreak, tcont);
  840 
  841         if (do_pretty_print) {
  842             $1->target_break = tbreak;
  843             $1->target_continue = tcont;
  844             ($1 + 1)->doloop_cond = tcont;
  845             $$ = list_prepend(ip, $1);
  846             bcfree($4);
  847             if ($8 != NULL)
  848                 $1->comment = $8;
  849         }
  850         /* else
  851             $1 and $4 are NULLs */
  852       }
  853     | LEX_FOR '(' NAME LEX_IN simple_variable r_paren opt_nls statement
  854       {
  855         INSTRUCTION *ip;
  856         char *var_name = $3->lextok;
  857 
  858         if ($8 != NULL
  859                 && $8->lasti->opcode == Op_K_delete
  860                 && $8->lasti->expr_count == 1
  861                 && $8->nexti->opcode == Op_push
  862                 && ($8->nexti->memory->type != Node_var || !($8->nexti->memory->var_update))
  863                 && strcmp($8->nexti->memory->vname, var_name) == 0
  864         ) {
  865 
  866         /*
  867          * Efficiency hack.  Recognize the special case of
  868          *
  869          *  for (iggy in foo)
  870          *      delete foo[iggy]
  871          *
  872          * and treat it as if it were
  873          *
  874          *  delete foo
  875          *
  876          * Check that the body is a `delete a[i]' statement,
  877          * and that both the loop var and array names match.
  878          */
  879             NODE *arr = NULL;
  880 
  881             ip = $8->nexti->nexti;
  882             if ($5->nexti->opcode == Op_push && $5->lasti == $5->nexti)
  883                 arr = $5->nexti->memory;
  884             if (arr != NULL
  885                     && ip->opcode == Op_no_op
  886                     && ip->nexti->opcode == Op_push_array
  887                     && strcmp(ip->nexti->memory->vname, arr->vname) == 0
  888                     && ip->nexti->nexti == $8->lasti
  889             ) {
  890                 (void) make_assignable($8->nexti);
  891                 $8->lasti->opcode = Op_K_delete_loop;
  892                 $8->lasti->expr_count = 0;
  893                 if ($1 != NULL)
  894                     bcfree($1);
  895                 efree(var_name);
  896                 bcfree($3);
  897                 bcfree($4);
  898                 bcfree($5);
  899                 if ($7 != NULL) {
  900                     merge_comments($7, NULL);
  901                     $8 = list_prepend($8, $7);
  902                 }
  903                 $$ = $8;
  904             } else
  905                 goto regular_loop;
  906         } else {
  907             INSTRUCTION *tbreak, *tcont;
  908 
  909             /*    [ Op_push_array a       ]
  910              *    [ Op_arrayfor_init | ib ]
  911              * ic:[ Op_arrayfor_incr | ib ]
  912              *    [ Op_var_assign if any  ]
  913              *
  914              *              body
  915              *
  916              *    [Op_jmp | ic            ]
  917              * ib:[Op_arrayfor_final      ]
  918              */
  919 regular_loop:
  920             ip = $5;
  921             ip->nexti->opcode = Op_push_array;
  922 
  923             tbreak = instruction(Op_arrayfor_final);
  924             $4->opcode = Op_arrayfor_incr;
  925             $4->array_var = variable($3->source_line, var_name, Node_var);
  926             $4->target_jmp = tbreak;
  927             tcont = $4;
  928             $3->opcode = Op_arrayfor_init;
  929             $3->target_jmp = tbreak;
  930             (void) list_append(ip, $3);
  931 
  932             if (do_pretty_print) {
  933                 $1->opcode = Op_K_arrayfor;
  934                 $1->target_continue = tcont;
  935                 $1->target_break = tbreak;
  936                 (void) list_append(ip, $1);
  937             }
  938             /* else
  939                 $1 is NULL */
  940 
  941             /* add update_FOO instruction if necessary */
  942             if ($4->array_var->type == Node_var && $4->array_var->var_update) {
  943                 (void) list_append(ip, instruction(Op_var_update));
  944                 ip->lasti->update_var = $4->array_var->var_update;
  945             }
  946             (void) list_append(ip, $4);
  947 
  948             /* add set_FOO instruction if necessary */
  949             if ($4->array_var->type == Node_var && $4->array_var->var_assign) {
  950                 (void) list_append(ip, instruction(Op_var_assign));
  951                 ip->lasti->assign_var = $4->array_var->var_assign;
  952             }
  953 
  954             if (do_pretty_print) {
  955                 (void) list_append(ip, instruction(Op_exec_count));
  956                 ($1 + 1)->forloop_cond = $4;
  957                 ($1 + 1)->forloop_body = ip->lasti;
  958             }
  959 
  960             if ($7 != NULL)
  961                 merge_comments($7, NULL);
  962 
  963             if ($8 != NULL) {
  964                 if ($7 != NULL)
  965                     $8 = list_prepend($8, $7);
  966                 (void) list_merge(ip, $8);
  967             } else if ($7 != NULL)
  968                 (void) list_append(ip, $7);
  969 
  970             (void) list_append(ip, instruction(Op_jmp));
  971             ip->lasti->target_jmp = $4;
  972             $$ = list_append(ip, tbreak);
  973             fix_break_continue(ip, tbreak, tcont);
  974         }
  975 
  976         break_allowed--;
  977         continue_allowed--;
  978       }
  979     | LEX_FOR '(' opt_simple_stmt semi opt_nls exp semi opt_nls opt_simple_stmt r_paren opt_nls statement
  980       {
  981         if ($5 != NULL) {
  982             merge_comments($5, NULL);
  983             $1->comment = $5;
  984         }
  985         if ($8 != NULL) {
  986             merge_comments($8, NULL);
  987             if ($1->comment == NULL) {
  988                 $8->memory->comment_type = FOR_COMMENT;
  989                 $1->comment = $8;
  990             } else
  991                 $1->comment->comment = $8;
  992         }
  993         if ($11 != NULL)
  994             $12 = list_prepend($12, $11);
  995         add_lint($6, LINT_assign_in_cond);
  996         $$ = mk_for_loop($1, $3, $6, $9, $12);
  997 
  998         break_allowed--;
  999         continue_allowed--;
 1000       }
 1001     | LEX_FOR '(' opt_simple_stmt semi opt_nls semi opt_nls opt_simple_stmt r_paren opt_nls statement
 1002       {
 1003         if ($5 != NULL) {
 1004             merge_comments($5, NULL);
 1005             $1->comment = $5;
 1006         }
 1007         if ($7 != NULL) {
 1008             merge_comments($7, NULL);
 1009             if ($1->comment == NULL) {
 1010                 $7->memory->comment_type = FOR_COMMENT;
 1011                 $1->comment = $7;
 1012             } else
 1013                 $1->comment->comment = $7;
 1014         }
 1015         if ($10 != NULL)
 1016             $11 = list_prepend($11, $10);
 1017         $$ = mk_for_loop($1, $3, (INSTRUCTION *) NULL, $8, $11);
 1018 
 1019         break_allowed--;
 1020         continue_allowed--;
 1021       }
 1022     | non_compound_stmt
 1023       {
 1024         if (do_pretty_print)
 1025             $$ = list_prepend($1, instruction(Op_exec_count));
 1026         else
 1027             $$ = $1;
 1028       }
 1029     ;
 1030 
 1031 non_compound_stmt
 1032     : LEX_BREAK statement_term
 1033       {
 1034         if (! break_allowed)
 1035             error_ln($1->source_line,
 1036                 _("`break' is not allowed outside a loop or switch"));
 1037         $1->target_jmp = NULL;
 1038         $$ = list_create($1);
 1039         if ($2 != NULL)
 1040             $$ = list_append($$, $2);
 1041       }
 1042     | LEX_CONTINUE statement_term
 1043       {
 1044         if (! continue_allowed)
 1045             error_ln($1->source_line,
 1046                 _("`continue' is not allowed outside a loop"));
 1047         $1->target_jmp = NULL;
 1048         $$ = list_create($1);
 1049         if ($2 != NULL)
 1050             $$ = list_append($$, $2);
 1051       }
 1052     | LEX_NEXT statement_term
 1053       {
 1054         /* if inside function (rule = 0), resolve context at run-time */
 1055         if (rule && rule != Rule)
 1056             error_ln($1->source_line,
 1057                 _("`next' used in %s action"), ruletab[rule]);
 1058         $1->target_jmp = ip_rec;
 1059         $$ = list_create($1);
 1060         if ($2 != NULL)
 1061             $$ = list_append($$, $2);
 1062       }
 1063     | LEX_NEXTFILE statement_term
 1064       {
 1065         /* if inside function (rule = 0), resolve context at run-time */
 1066         if (rule == BEGIN || rule == END || rule == ENDFILE)
 1067             error_ln($1->source_line,
 1068                 _("`nextfile' used in %s action"), ruletab[rule]);
 1069 
 1070         $1->target_newfile = ip_newfile;
 1071         $1->target_endfile = ip_endfile;
 1072         $$ = list_create($1);
 1073         if ($2 != NULL)
 1074             $$ = list_append($$, $2);
 1075       }
 1076     | LEX_EXIT opt_exp statement_term
 1077       {
 1078         /* Initialize the two possible jump targets, the actual target
 1079          * is resolved at run-time.
 1080          */
 1081         $1->target_end = ip_end;    /* first instruction in end_block */
 1082         $1->target_atexit = ip_atexit;  /* cleanup and go home */
 1083 
 1084         if ($2 == NULL) {
 1085             $$ = list_create($1);
 1086             (void) list_prepend($$, instruction(Op_push_i));
 1087             $$->nexti->memory = dupnode(Nnull_string);
 1088         } else
 1089             $$ = list_append($2, $1);
 1090         if ($3 != NULL)
 1091             $$ = list_append($$, $3);
 1092       }
 1093     | LEX_RETURN
 1094       {
 1095         if (! in_function)
 1096             yyerror(_("`return' used outside function context"));
 1097       } opt_exp statement_term {
 1098         if (called_from_eval)
 1099             $1->opcode = Op_K_return_from_eval;
 1100 
 1101         if ($3 == NULL) {
 1102             $$ = list_create($1);
 1103             (void) list_prepend($$, instruction(Op_push_i));
 1104             $$->nexti->memory = dupnode(Nnull_string);
 1105         } else
 1106             $$ = list_append($3, $1);
 1107         if ($4 != NULL)
 1108             $$ = list_append($$, $4);
 1109       }
 1110     | simple_stmt statement_term
 1111       {
 1112         if ($2 != NULL)
 1113             $$ = list_append($1, $2);
 1114         else
 1115             $$ = $1;
 1116       }
 1117     ;
 1118 
 1119     /*
 1120      * A simple_stmt exists to satisfy a constraint in the POSIX
 1121      * grammar allowing them to occur as the 1st and 3rd parts
 1122      * in a `for (...;...;...)' loop.  This is a historical oddity
 1123      * inherited from Unix awk, not at all documented in the AK&W
 1124      * awk book.  We support it, as this was reported as a bug.
 1125      * We don't bother to document it though. So there.
 1126      */
 1127 simple_stmt
 1128     : print { in_print = true; in_parens = 0; } print_expression_list output_redir
 1129       {
 1130         /*
 1131          * Optimization: plain `print' has no expression list, so $3 is null.
 1132          * If $3 is NULL or is a bytecode list for $0 use Op_K_print_rec,
 1133          * which is faster for these two cases.
 1134          */
 1135 
 1136         if (do_optimize && $1->opcode == Op_K_print &&
 1137             ($3 == NULL
 1138                 || ($3->lasti->opcode == Op_field_spec
 1139                     && $3->nexti->nexti->nexti == $3->lasti
 1140                     && $3->nexti->nexti->opcode == Op_push_i
 1141                     && $3->nexti->nexti->memory->type == Node_val)
 1142             )
 1143         ) {
 1144             static bool warned = false;
 1145             /*   -----------------
 1146              *      output_redir
 1147              *    [ redirect exp ]
 1148              *   -----------------
 1149              *     expression_list
 1150              *   ------------------
 1151              *    [Op_K_print_rec | NULL | redir_type | expr_count]
 1152              */
 1153 
 1154             if ($3 != NULL) {
 1155                 NODE *n = $3->nexti->nexti->memory;
 1156 
 1157                 if (! iszero(n))
 1158                     goto regular_print;
 1159 
 1160                 bcfree($3->lasti);          /* Op_field_spec */
 1161                 unref(n);               /* Node_val */
 1162                 bcfree($3->nexti->nexti);       /* Op_push_i */
 1163                 bcfree($3->nexti);          /* Op_list */
 1164                 bcfree($3);             /* Op_list */
 1165             } else {
 1166                 if (do_lint && (rule == BEGIN || rule == END) && ! warned) {
 1167                     warned = true;
 1168                     lintwarn_ln($1->source_line,
 1169         _("plain `print' in BEGIN or END rule should probably be `print \"\"'"));
 1170                 }
 1171             }
 1172 
 1173             $1->expr_count = 0;
 1174             $1->opcode = Op_K_print_rec;
 1175             if ($4 == NULL) {    /* no redircetion */
 1176                 $1->redir_type = redirect_none;
 1177                 $$ = list_create($1);
 1178             } else {
 1179                 INSTRUCTION *ip;
 1180                 ip = $4->nexti;
 1181                 $1->redir_type = ip->redir_type;
 1182                 $4->nexti = ip->nexti;
 1183                 bcfree(ip);
 1184                 $$ = list_append($4, $1);
 1185             }
 1186         } else {
 1187             /*   -----------------
 1188              *    [ output_redir    ]
 1189              *    [ redirect exp    ]
 1190              *   -----------------
 1191              *    [ expression_list ]
 1192              *   ------------------
 1193              *    [$1 | NULL | redir_type | expr_count]
 1194              *
 1195              */
 1196 regular_print:
 1197             if ($4 == NULL) {       /* no redirection */
 1198                 if ($3 == NULL) {   /* print/printf without arg */
 1199                     $1->expr_count = 0;
 1200                     if ($1->opcode == Op_K_print)
 1201                         $1->opcode = Op_K_print_rec;
 1202                     $1->redir_type = redirect_none;
 1203                     $$ = list_create($1);
 1204                 } else {
 1205                     INSTRUCTION *t = $3;
 1206                     $1->expr_count = count_expressions(&t, false);
 1207                     $1->redir_type = redirect_none;
 1208                     $$ = list_append(t, $1);
 1209                 }
 1210             } else {
 1211                 INSTRUCTION *ip;
 1212                 ip = $4->nexti;
 1213                 $1->redir_type = ip->redir_type;
 1214                 $4->nexti = ip->nexti;
 1215                 bcfree(ip);
 1216                 if ($3 == NULL) {
 1217                     $1->expr_count = 0;
 1218                     if ($1->opcode == Op_K_print)
 1219                         $1->opcode = Op_K_print_rec;
 1220                     $$ = list_append($4, $1);
 1221                 } else {
 1222                     INSTRUCTION *t = $3;
 1223                     $1->expr_count = count_expressions(&t, false);
 1224                     $$ = list_append(list_merge($4, t), $1);
 1225                 }
 1226             }
 1227         }
 1228       }
 1229 
 1230     | LEX_DELETE NAME { sub_counter = 0; } delete_subscript_list
 1231       {
 1232         char *arr = $2->lextok;
 1233 
 1234         $2->opcode = Op_push_array;
 1235         $2->memory = variable($2->source_line, arr, Node_var_new);
 1236 
 1237         if (! do_posix && ! do_traditional) {
 1238             if ($2->memory == symbol_table)
 1239                 fatal(_("`delete' is not allowed with SYMTAB"));
 1240             else if ($2->memory == func_table)
 1241                 fatal(_("`delete' is not allowed with FUNCTAB"));
 1242         }
 1243 
 1244         if ($4 == NULL) {
 1245             /*
 1246              * As of September 2012, POSIX has added support
 1247              * for `delete array'. See:
 1248              * http://austingroupbugs.net/view.php?id=544
 1249              *
 1250              * Thanks to Nathan Weeks for the initiative.
 1251              *
 1252              * Thus we no longer warn or check do_posix.
 1253              * Also, since BWK awk supports it, we don't have to
 1254              * check do_traditional either.
 1255              */
 1256             $1->expr_count = 0;
 1257             $$ = list_append(list_create($2), $1);
 1258         } else {
 1259             $1->expr_count = sub_counter;
 1260             $$ = list_append(list_append($4, $2), $1);
 1261         }
 1262       }
 1263     | LEX_DELETE '(' NAME ')'
 1264           /*
 1265            * this is for tawk compatibility. maybe the warnings
 1266            * should always be done.
 1267            */
 1268       {
 1269         static bool warned = false;
 1270         char *arr = $3->lextok;
 1271 
 1272         if (do_lint && ! warned) {
 1273             warned = true;
 1274             lintwarn_ln($1->source_line,
 1275                 _("`delete(array)' is a non-portable tawk extension"));
 1276         }
 1277         if (do_traditional) {
 1278             error_ln($1->source_line,
 1279                 _("`delete(array)' is a non-portable tawk extension"));
 1280         }
 1281         $3->memory = variable($3->source_line, arr, Node_var_new);
 1282         $3->opcode = Op_push_array;
 1283         $1->expr_count = 0;
 1284         $$ = list_append(list_create($3), $1);
 1285 
 1286         if (! do_posix && ! do_traditional) {
 1287             if ($3->memory == symbol_table)
 1288                 fatal(_("`delete' is not allowed with SYMTAB"));
 1289             else if ($3->memory == func_table)
 1290                 fatal(_("`delete' is not allowed with FUNCTAB"));
 1291         }
 1292       }
 1293     | exp
 1294       {
 1295         $$ = optimize_assignment($1);
 1296       }
 1297     ;
 1298 
 1299 opt_simple_stmt
 1300     : /* empty */
 1301       { $$ = NULL; }
 1302     | simple_stmt
 1303       { $$ = $1; }
 1304     ;
 1305 
 1306 case_statements
 1307     : /* empty */
 1308       { $$ = NULL; }
 1309     | case_statements case_statement
 1310       {
 1311         if ($1 == NULL)
 1312             $$ = list_create($2);
 1313         else
 1314             $$ = list_prepend($1, $2);
 1315       }
 1316     | case_statements error
 1317       { $$ = NULL; }
 1318     ;
 1319 
 1320 case_statement
 1321     : LEX_CASE case_value colon opt_nls statements
 1322       {
 1323         INSTRUCTION *casestmt = $5;
 1324         if ($5 == NULL)
 1325             casestmt = list_create(instruction(Op_no_op));
 1326         if (do_pretty_print)
 1327             (void) list_prepend(casestmt, instruction(Op_exec_count));
 1328         $1->case_exp = $2;
 1329         $1->case_stmt = casestmt;
 1330         $1->comment = $4;
 1331         bcfree($3);
 1332         $$ = $1;
 1333       }
 1334     | LEX_DEFAULT colon opt_nls statements
 1335       {
 1336         INSTRUCTION *casestmt = $4;
 1337         if ($4 == NULL)
 1338             casestmt = list_create(instruction(Op_no_op));
 1339         if (do_pretty_print)
 1340             (void) list_prepend(casestmt, instruction(Op_exec_count));
 1341         bcfree($2);
 1342         $1->case_stmt = casestmt;
 1343         $1->comment = $3;
 1344         $$ = $1;
 1345       }
 1346     ;
 1347 
 1348 case_value
 1349     : YNUMBER
 1350       { $$ = $1; }
 1351     | '-' YNUMBER    %prec UNARY
 1352       {
 1353         NODE *n = $2->memory;
 1354         (void) force_number(n);
 1355         negate_num(n);
 1356         bcfree($1);
 1357         $$ = $2;
 1358       }
 1359     | '+' YNUMBER    %prec UNARY
 1360       {
 1361         NODE *n = $2->lasti->memory;
 1362         bcfree($1);
 1363         add_sign_to_num(n, '+');
 1364         $$ = $2;
 1365       }
 1366     | YSTRING
 1367       { $$ = $1; }
 1368     | regexp
 1369       {
 1370         if ($1->memory->type == Node_regex)
 1371             $1->opcode = Op_push_re;
 1372         else
 1373             $1->opcode = Op_push;
 1374         $$ = $1;
 1375       }
 1376     | typed_regexp
 1377       {
 1378         assert(($1->memory->flags & REGEX) == REGEX);
 1379         $1->opcode = Op_push_re;
 1380         $$ = $1;
 1381       }
 1382     ;
 1383 
 1384 print
 1385     : LEX_PRINT
 1386       { $$ = $1; }
 1387     | LEX_PRINTF
 1388       { $$ = $1; }
 1389     ;
 1390 
 1391     /*
 1392      * Note: ``print(x)'' is already parsed by the first rule,
 1393      * so there is no good in covering it by the second one too.
 1394      */
 1395 print_expression_list
 1396     : opt_expression_list
 1397     | '(' expression_list r_paren
 1398       {
 1399         $$ = $2;
 1400       }
 1401     ;
 1402 
 1403 output_redir
 1404     : /* empty */
 1405       {
 1406         in_print = false;
 1407         in_parens = 0;
 1408         $$ = NULL;
 1409       }
 1410     | IO_OUT { in_print = false; in_parens = 0; } common_exp
 1411       {
 1412         if ($1->redir_type == redirect_twoway
 1413                 && $3->lasti->opcode == Op_K_getline_redir
 1414                 && $3->lasti->redir_type == redirect_twoway)
 1415             yyerror(_("multistage two-way pipelines don't work"));
 1416         if (do_lint && $1->redir_type == redirect_output && $3->lasti->opcode == Op_concat)
 1417             lintwarn(_("concatenation as I/O `>' redirection target is ambiguous"));
 1418         $$ = list_prepend($3, $1);
 1419       }
 1420     ;
 1421 
 1422 if_statement
 1423     : LEX_IF '(' exp r_paren opt_nls statement
 1424       {
 1425         if ($5 != NULL)
 1426             $1->comment = $5;
 1427         add_lint($3, LINT_assign_in_cond);
 1428         $$ = mk_condition($3, $1, $6, NULL, NULL);
 1429       }
 1430     | LEX_IF '(' exp r_paren opt_nls statement
 1431          LEX_ELSE opt_nls statement
 1432       {
 1433         if ($5 != NULL)
 1434             $1->comment = $5;
 1435         if ($8 != NULL)
 1436             $7->comment = $8;
 1437         add_lint($3, LINT_assign_in_cond);
 1438         $$ = mk_condition($3, $1, $6, $7, $9);
 1439       }
 1440     ;
 1441 
 1442 nls
 1443     : NEWLINE
 1444       {
 1445         $$ = $1;
 1446       }
 1447     | nls NEWLINE
 1448       {
 1449         if ($1 != NULL && $2 != NULL) {
 1450             if ($1->memory->comment_type == EOL_COMMENT) {
 1451                 assert($2->memory->comment_type == BLOCK_COMMENT);
 1452                 $1->comment = $2;   // chain them
 1453             } else {
 1454                 merge_comments($1, $2);
 1455             }
 1456 
 1457             $$ = $1;
 1458         } else if ($1 != NULL) {
 1459             $$ = $1;
 1460         } else if ($2 != NULL) {
 1461             $$ = $2;
 1462         } else
 1463             $$ = NULL;
 1464       }
 1465     ;
 1466 
 1467 opt_nls
 1468     : /* empty */
 1469       { $$ = NULL; }
 1470     | nls
 1471       { $$ = $1; }
 1472     ;
 1473 
 1474 input_redir
 1475     : /* empty */
 1476       { $$ = NULL; }
 1477     | '<' simp_exp
 1478       {
 1479         bcfree($1);
 1480         $$ = $2;
 1481       }
 1482     ;
 1483 
 1484 opt_param_list
 1485     : /* empty */
 1486       { $$ = NULL; }
 1487     | param_list
 1488       { $$ = $1; }
 1489     ;
 1490 
 1491 param_list
 1492     : NAME
 1493       {
 1494         $1->param_count = 0;
 1495         $$ = list_create($1);
 1496       }
 1497     | param_list comma NAME
 1498       {
 1499         if ($1 != NULL && $3 != NULL) {
 1500             $3->param_count = $1->lasti->param_count + 1;
 1501             $$ = list_append($1, $3);
 1502             yyerrok;
 1503 
 1504             // newlines are allowed after commas, catch any comments
 1505             if ($2 != NULL) {
 1506                 if ($1->comment != NULL)
 1507                     merge_comments($1->comment, $2);
 1508                 else
 1509                     $1->comment = $2;
 1510             }
 1511         } else
 1512             $$ = NULL;
 1513       }
 1514     | error
 1515       { $$ = NULL; }
 1516     | param_list error
 1517       { $$ = $1; }
 1518     | param_list comma error
 1519       { $$ = $1; }
 1520     ;
 1521 
 1522 /* optional expression, as in for loop */
 1523 opt_exp
 1524     : /* empty */
 1525       { $$ = NULL; }
 1526     | exp
 1527       { $$ = $1; }
 1528     ;
 1529 
 1530 opt_expression_list
 1531     : /* empty */
 1532       { $$ = NULL; }
 1533     | expression_list
 1534       { $$ = $1; }
 1535     ;
 1536 
 1537 expression_list
 1538     : exp
 1539       { $$ = mk_expression_list(NULL, $1); }
 1540     | expression_list comma exp
 1541       {
 1542         if ($2 != NULL)
 1543             $1->lasti->comment = $2;
 1544         $$ = mk_expression_list($1, $3);
 1545         yyerrok;
 1546       }
 1547     | error
 1548       { $$ = NULL; }
 1549     | expression_list error
 1550       {
 1551         /*
 1552          * Returning the expression list instead of NULL lets
 1553          * snode get a list of arguments that it can count.
 1554          */
 1555         $$ = $1;
 1556       }
 1557     | expression_list error exp
 1558       {
 1559         /* Ditto */
 1560         $$ = mk_expression_list($1, $3);
 1561       }
 1562     | expression_list comma error
 1563       {
 1564         /* Ditto */
 1565         if ($2 != NULL)
 1566             $1->lasti->comment = $2;
 1567         $$ = $1;
 1568       }
 1569     ;
 1570 
 1571 opt_fcall_expression_list
 1572     : /* empty */
 1573       { $$ = NULL; }
 1574     | fcall_expression_list
 1575       { $$ = $1; }
 1576     ;
 1577 
 1578 fcall_expression_list
 1579     : fcall_exp
 1580       { $$ = mk_expression_list(NULL, $1); }
 1581     | fcall_expression_list comma fcall_exp
 1582       {
 1583         if ($2 != NULL)
 1584             $1->lasti->comment = $2;
 1585         $$ = mk_expression_list($1, $3);
 1586         yyerrok;
 1587       }
 1588     | error
 1589       { $$ = NULL; }
 1590     | fcall_expression_list error
 1591       {
 1592         /*
 1593          * Returning the expression list instead of NULL lets
 1594          * snode get a list of arguments that it can count.
 1595          */
 1596         $$ = $1;
 1597       }
 1598     | fcall_expression_list error fcall_exp
 1599       {
 1600         /* Ditto */
 1601         $$ = mk_expression_list($1, $3);
 1602       }
 1603     | fcall_expression_list comma error
 1604       {
 1605         /* Ditto */
 1606         if ($2 != NULL)
 1607             $1->comment = $2;
 1608         $$ = $1;
 1609       }
 1610     ;
 1611 
 1612 fcall_exp
 1613     : exp { $$ = $1; }
 1614     | typed_regexp { $$ = list_create($1); }
 1615     ;
 1616 
 1617 /* Expressions, not including the comma operator.  */
 1618 exp
 1619     : variable assign_operator exp %prec ASSIGNOP
 1620       {
 1621         if (do_lint && $3->lasti->opcode == Op_match_rec)
 1622             lintwarn_ln($2->source_line,
 1623                 _("regular expression on right of assignment"));
 1624         $$ = mk_assignment($1, $3, $2);
 1625       }
 1626     | variable ASSIGN typed_regexp %prec ASSIGNOP
 1627       {
 1628         $$ = mk_assignment($1, list_create($3), $2);
 1629       }
 1630     | exp LEX_AND exp
 1631       { $$ = mk_boolean($1, $3, $2); }
 1632     | exp LEX_OR exp
 1633       { $$ = mk_boolean($1, $3, $2); }
 1634     | exp MATCHOP typed_regexp
 1635       {
 1636         if ($1->lasti->opcode == Op_match_rec)
 1637             warning_ln($2->source_line,
 1638                 _("regular expression on left of `~' or `!~' operator"));
 1639 
 1640         assert($3->opcode == Op_push_re
 1641             && ($3->memory->flags & REGEX) != 0);
 1642         /* RHS is @/.../ */
 1643         $2->memory = $3->memory;
 1644         bcfree($3);
 1645         $$ = list_append($1, $2);
 1646       }
 1647     | exp MATCHOP exp
 1648       {
 1649         if ($1->lasti->opcode == Op_match_rec)
 1650             warning_ln($2->source_line,
 1651                 _("regular expression on left of `~' or `!~' operator"));
 1652 
 1653         if ($3->lasti == $3->nexti && $3->nexti->opcode == Op_match_rec) {
 1654             /* RHS is /.../ */
 1655             $2->memory = $3->nexti->memory;
 1656             bcfree($3->nexti);  /* Op_match_rec */
 1657             bcfree($3);         /* Op_list */
 1658             $$ = list_append($1, $2);
 1659         } else {
 1660             $2->memory = make_regnode(Node_dynregex, NULL);
 1661             $$ = list_append(list_merge($1, $3), $2);
 1662         }
 1663       }
 1664     | exp LEX_IN simple_variable
 1665       {
 1666         if (do_lint_old)
 1667             lintwarn_ln($2->source_line,
 1668                 _("old awk does not support the keyword `in' except after `for'"));
 1669         $3->nexti->opcode = Op_push_array;
 1670         $2->opcode = Op_in_array;
 1671         $2->expr_count = 1;
 1672         $$ = list_append(list_merge($1, $3), $2);
 1673       }
 1674     | exp a_relop exp %prec RELOP
 1675       {
 1676         if (do_lint && $3->lasti->opcode == Op_match_rec)
 1677             lintwarn_ln($2->source_line,
 1678                 _("regular expression on right of comparison"));
 1679         $$ = list_append(list_merge($1, $3), $2);
 1680       }
 1681     | exp '?' exp ':' exp
 1682       { $$ = mk_condition($1, $2, $3, $4, $5); }
 1683     | common_exp
 1684       { $$ = $1; }
 1685     ;
 1686 
 1687 assign_operator
 1688     : ASSIGN
 1689       { $$ = $1; }
 1690     | ASSIGNOP
 1691       { $$ = $1; }
 1692     | SLASH_BEFORE_EQUAL ASSIGN   /* `/=' */
 1693       {
 1694         $2->opcode = Op_assign_quotient;
 1695         $$ = $2;
 1696       }
 1697     ;
 1698 
 1699 relop_or_less
 1700     : RELOP
 1701       { $$ = $1; }
 1702     | '<'
 1703       { $$ = $1; }
 1704     ;
 1705 
 1706 a_relop
 1707     : relop_or_less
 1708       { $$ = $1; }
 1709     | '>'
 1710       { $$ = $1; }
 1711     ;
 1712 
 1713 common_exp
 1714     : simp_exp
 1715       { $$ = $1; }
 1716     | simp_exp_nc
 1717       { $$ = $1; }
 1718     | common_exp simp_exp %prec CONCAT_OP
 1719       {
 1720         int count = 2;
 1721         bool is_simple_var = false;
 1722 
 1723         if ($1->lasti->opcode == Op_concat) {
 1724             /* multiple (> 2) adjacent strings optimization */
 1725             is_simple_var = ($1->lasti->concat_flag & CSVAR) != 0;
 1726             count = $1->lasti->expr_count + 1;
 1727             $1->lasti->opcode = Op_no_op;
 1728         } else {
 1729             is_simple_var = ($1->nexti->opcode == Op_push
 1730                     && $1->lasti == $1->nexti); /* first exp. is a simple
 1731                                                  * variable?; kludge for use
 1732                                                  * in Op_assign_concat.
 1733                                                  */
 1734         }
 1735 
 1736         if (do_optimize
 1737             && $1->nexti == $1->lasti && $1->nexti->opcode == Op_push_i
 1738             && $2->nexti == $2->lasti && $2->nexti->opcode == Op_push_i
 1739         ) {
 1740             NODE *n1 = $1->nexti->memory;
 1741             NODE *n2 = $2->nexti->memory;
 1742             size_t nlen;
 1743 
 1744             // 1.5 ""   # can't fold this if program mucks with CONVFMT.
 1745             // See test #12 in test/posix.awk.
 1746             // Also can't fold if one or the other is translatable.
 1747             if ((n1->flags & (NUMBER|NUMINT|INTLSTR)) != 0 || (n2->flags & (NUMBER|NUMINT|INTLSTR)) != 0)
 1748                 goto plain_concat;
 1749 
 1750             n1 = force_string(n1);
 1751             n2 = force_string(n2);
 1752             nlen = n1->stlen + n2->stlen;
 1753             erealloc(n1->stptr, char *, nlen + 1, "constant fold");
 1754             memcpy(n1->stptr + n1->stlen, n2->stptr, n2->stlen);
 1755             n1->stlen = nlen;
 1756             n1->stptr[nlen] = '\0';
 1757             n1->flags &= ~(NUMCUR|NUMBER|NUMINT);
 1758             n1->flags |= (STRING|STRCUR);
 1759             unref(n2);
 1760             bcfree($2->nexti);
 1761             bcfree($2);
 1762             $$ = $1;
 1763         } else {
 1764     plain_concat:
 1765             $$ = list_append(list_merge($1, $2), instruction(Op_concat));
 1766             $$->lasti->concat_flag = (is_simple_var ? CSVAR : 0);
 1767             $$->lasti->expr_count = count;
 1768             if (count > max_args)
 1769                 max_args = count;
 1770         }
 1771       }
 1772     ;
 1773 
 1774 simp_exp
 1775     : non_post_simp_exp
 1776     /* Binary operators in order of decreasing precedence.  */
 1777     | simp_exp '^' simp_exp
 1778       { $$ = mk_binary($1, $3, $2); }
 1779     | simp_exp '*' simp_exp
 1780       { $$ = mk_binary($1, $3, $2); }
 1781     | simp_exp '/' simp_exp
 1782       { $$ = mk_binary($1, $3, $2); }
 1783     | simp_exp '%' simp_exp
 1784       { $$ = mk_binary($1, $3, $2); }
 1785     | simp_exp '+' simp_exp
 1786       { $$ = mk_binary($1, $3, $2); }
 1787     | simp_exp '-' simp_exp
 1788       { $$ = mk_binary($1, $3, $2); }
 1789     | LEX_GETLINE opt_variable input_redir
 1790       {
 1791         /*
 1792          * In BEGINFILE/ENDFILE, allow `getline [var] < file'
 1793          */
 1794 
 1795         if ((rule == BEGINFILE || rule == ENDFILE) && $3 == NULL)
 1796             error_ln($1->source_line,
 1797                  _("non-redirected `getline' invalid inside `%s' rule"), ruletab[rule]);
 1798         if (do_lint && rule == END && $3 == NULL)
 1799             lintwarn_ln($1->source_line,
 1800                 _("non-redirected `getline' undefined inside END action"));
 1801         $$ = mk_getline($1, $2, $3, redirect_input);
 1802       }
 1803     | variable INCREMENT
 1804       {
 1805         $2->opcode = Op_postincrement;
 1806         $$ = mk_assignment($1, NULL, $2);
 1807       }
 1808     | variable DECREMENT
 1809       {
 1810         $2->opcode = Op_postdecrement;
 1811         $$ = mk_assignment($1, NULL, $2);
 1812       }
 1813     | '(' expression_list r_paren LEX_IN simple_variable
 1814       {
 1815         if (do_lint_old) {
 1816             /* first one is warning so that second one comes out if warnings are fatal */
 1817             warning_ln($4->source_line,
 1818                 _("old awk does not support the keyword `in' except after `for'"));
 1819             lintwarn_ln($4->source_line,
 1820                 _("old awk does not support multidimensional arrays"));
 1821         }
 1822         $5->nexti->opcode = Op_push_array;
 1823         $4->opcode = Op_in_array;
 1824         if ($2 == NULL) {   /* error */
 1825             errcount++;
 1826             $4->expr_count = 0;
 1827             $$ = list_merge($5, $4);
 1828         } else {
 1829             INSTRUCTION *t = $2;
 1830             $4->expr_count = count_expressions(&t, false);
 1831             $$ = list_append(list_merge(t, $5), $4);
 1832         }
 1833       }
 1834     ;
 1835 
 1836 /* Expressions containing "| getline" lose the ability to be on the
 1837    right-hand side of a concatenation. */
 1838 simp_exp_nc
 1839     : common_exp IO_IN LEX_GETLINE opt_variable
 1840         {
 1841           $$ = mk_getline($3, $4, $1, $2->redir_type);
 1842           bcfree($2);
 1843         }
 1844     /* Binary operators in order of decreasing precedence.  */
 1845     | simp_exp_nc '^' simp_exp
 1846       { $$ = mk_binary($1, $3, $2); }
 1847     | simp_exp_nc '*' simp_exp
 1848       { $$ = mk_binary($1, $3, $2); }
 1849     | simp_exp_nc '/' simp_exp
 1850       { $$ = mk_binary($1, $3, $2); }
 1851     | simp_exp_nc '%' simp_exp
 1852       { $$ = mk_binary($1, $3, $2); }
 1853     | simp_exp_nc '+' simp_exp
 1854       { $$ = mk_binary($1, $3, $2); }
 1855     | simp_exp_nc '-' simp_exp
 1856       { $$ = mk_binary($1, $3, $2); }
 1857     ;
 1858 
 1859 non_post_simp_exp
 1860     : regexp
 1861       {
 1862         $$ = list_create($1);
 1863       }
 1864     | '!' simp_exp %prec UNARY
 1865       {
 1866         if ($2->opcode == Op_match_rec) {
 1867             $2->opcode = Op_nomatch;
 1868             $1->opcode = Op_push_i;
 1869             $1->memory = set_profile_text(make_number(0.0), "0", 1);
 1870             $$ = list_append(list_append(list_create($1),
 1871                         instruction(Op_field_spec)), $2);
 1872         } else {
 1873             if (do_optimize && $2->nexti == $2->lasti
 1874                     && $2->nexti->opcode == Op_push_i
 1875                     && ($2->nexti->memory->flags & (MPFN|MPZN|INTLSTR)) == 0
 1876             ) {
 1877                 NODE *n = $2->nexti->memory;
 1878                 if ((n->flags & STRING) != 0) {
 1879                     n->numbr = (AWKNUM) (n->stlen == 0);
 1880                     n->flags &= ~(STRCUR|STRING);
 1881                     n->flags |= (NUMCUR|NUMBER);
 1882                     efree(n->stptr);
 1883                     n->stptr = NULL;
 1884                     n->stlen = 0;
 1885                 } else
 1886                     n->numbr = (AWKNUM) (n->numbr == 0.0);
 1887                 bcfree($1);
 1888                 $$ = $2;
 1889             } else {
 1890                 $1->opcode = Op_not;
 1891                 add_lint($2, LINT_assign_in_cond);
 1892                 $$ = list_append($2, $1);
 1893             }
 1894         }
 1895        }
 1896     | '(' exp r_paren
 1897       {
 1898         // Always include. Allows us to lint warn on
 1899         // print "foo" > "bar" 1
 1900         // but not warn on
 1901         // print "foo" > ("bar" 1)
 1902         $$ = list_append($2, bcalloc(Op_parens, 1, sourceline));
 1903       }
 1904     | LEX_BUILTIN '(' opt_fcall_expression_list r_paren
 1905       {
 1906         $$ = snode($3, $1);
 1907         if ($$ == NULL)
 1908             YYABORT;
 1909       }
 1910     | LEX_LENGTH '(' opt_fcall_expression_list r_paren
 1911       {
 1912         $$ = snode($3, $1);
 1913         if ($$ == NULL)
 1914             YYABORT;
 1915       }
 1916     | LEX_LENGTH
 1917       {
 1918         static bool warned = false;
 1919 
 1920         if (do_lint && ! warned) {
 1921             warned = true;
 1922             lintwarn_ln($1->source_line,
 1923                 _("call of `length' without parentheses is not portable"));
 1924         }
 1925         $$ = snode(NULL, $1);
 1926         if ($$ == NULL)
 1927             YYABORT;
 1928       }
 1929     | func_call
 1930     | variable
 1931     | INCREMENT variable
 1932       {
 1933         $1->opcode = Op_preincrement;
 1934         $$ = mk_assignment($2, NULL, $1);
 1935       }
 1936     | DECREMENT variable
 1937       {
 1938         $1->opcode = Op_predecrement;
 1939         $$ = mk_assignment($2, NULL, $1);
 1940       }
 1941     | YNUMBER
 1942       {
 1943         $$ = list_create($1);
 1944       }
 1945     | YSTRING
 1946       {
 1947         $$ = list_create($1);
 1948       }
 1949     | '-' simp_exp    %prec UNARY
 1950       {
 1951         if ($2->lasti->opcode == Op_push_i
 1952             && ($2->lasti->memory->flags & STRING) == 0
 1953         ) {
 1954             NODE *n = $2->lasti->memory;
 1955             (void) force_number(n);
 1956             negate_num(n);
 1957             $$ = $2;
 1958             bcfree($1);
 1959         } else {
 1960             $1->opcode = Op_unary_minus;
 1961             $$ = list_append($2, $1);
 1962         }
 1963       }
 1964     | '+' simp_exp    %prec UNARY
 1965       {
 1966         if ($2->lasti->opcode == Op_push_i
 1967             && ($2->lasti->memory->flags & STRING) == 0
 1968             && ($2->lasti->memory->flags & NUMCONSTSTR) != 0) {
 1969             NODE *n = $2->lasti->memory;
 1970             add_sign_to_num(n, '+');
 1971             $$ = $2;
 1972             bcfree($1);
 1973         } else {
 1974             /*
 1975              * was: $$ = $2
 1976              * POSIX semantics: force a conversion to numeric type
 1977              */
 1978             $1->opcode = Op_unary_plus;
 1979             $$ = list_append($2, $1);
 1980         }
 1981       }
 1982     ;
 1983 
 1984 func_call
 1985     : direct_func_call
 1986       {
 1987         func_use($1->lasti->func_name, FUNC_USE);
 1988         $$ = $1;
 1989       }
 1990     | '@' direct_func_call
 1991       {
 1992         /* indirect function call */
 1993         INSTRUCTION *f, *t;
 1994         char *name;
 1995         NODE *indirect_var;
 1996         static bool warned = false;
 1997         const char *msg = _("indirect function calls are a gawk extension");
 1998 
 1999         if (do_traditional || do_posix)
 2000             yyerror("%s", msg);
 2001         else if (do_lint_extensions && ! warned) {
 2002             warned = true;
 2003             lintwarn("%s", msg);
 2004         }
 2005 
 2006         f = $2->lasti;
 2007         f->opcode = Op_indirect_func_call;
 2008         name = estrdup(f->func_name, strlen(f->func_name));
 2009         if (is_std_var(name))
 2010             yyerror(_("cannot use special variable `%s' for indirect function call"), name);
 2011         indirect_var = variable(f->source_line, name, Node_var_new);
 2012         t = instruction(Op_push);
 2013         t->memory = indirect_var;
 2014 
 2015         /* prepend indirect var instead of appending to arguments (opt_expression_list),
 2016          * and pop it off in setup_frame (eval.c) (left to right evaluation order); Test case:
 2017          *      f = "fun"
 2018          *      @f(f="real_fun")
 2019          */
 2020 
 2021         $$ = list_prepend($2, t);
 2022         at_seen = false;
 2023       }
 2024     ;
 2025 
 2026 direct_func_call
 2027     : FUNC_CALL '(' opt_fcall_expression_list r_paren
 2028       {
 2029         NODE *n;
 2030         char *name = $1->func_name;
 2031         char *qname = qualify_name(name, strlen(name));
 2032 
 2033         if (qname != name) {
 2034             efree((char *) name);
 2035             $1->func_name = qname;
 2036         }
 2037 
 2038         if (! at_seen) {
 2039             n = lookup($1->func_name);
 2040             if (n != NULL && n->type != Node_func
 2041                 && n->type != Node_ext_func) {
 2042                 error_ln($1->source_line,
 2043                     _("attempt to use non-function `%s' in function call"),
 2044                         $1->func_name);
 2045             }
 2046         }
 2047 
 2048         param_sanity($3);
 2049         $1->opcode = Op_func_call;
 2050         $1->func_body = NULL;
 2051         if ($3 == NULL) {   /* no argument or error */
 2052             ($1 + 1)->expr_count = 0;
 2053             $$ = list_create($1);
 2054         } else {
 2055             INSTRUCTION *t = $3;
 2056             ($1 + 1)->expr_count = count_expressions(&t, true);
 2057             $$ = list_append(t, $1);
 2058         }
 2059       }
 2060     ;
 2061 
 2062 opt_variable
 2063     : /* empty */
 2064       { $$ = NULL; }
 2065     | variable
 2066       { $$ = $1; }
 2067     ;
 2068 
 2069 delete_subscript_list
 2070     : /* empty */
 2071       { $$ = NULL; }
 2072     | delete_subscript SUBSCRIPT
 2073       { $$ = $1; }
 2074     ;
 2075 
 2076 delete_subscript
 2077     : delete_exp_list
 2078       { $$ = $1; }
 2079     | delete_subscript delete_exp_list
 2080       {
 2081         $$ = list_merge($1, $2);
 2082       }
 2083     ;
 2084 
 2085 delete_exp_list
 2086     : bracketed_exp_list
 2087       {
 2088         INSTRUCTION *ip = $1->lasti;
 2089         int count = ip->sub_count;  /* # of SUBSEP-seperated expressions */
 2090         if (count > 1) {
 2091             /* change Op_subscript or Op_sub_array to Op_concat */
 2092             ip->opcode = Op_concat;
 2093             ip->concat_flag = CSUBSEP;
 2094             ip->expr_count = count;
 2095         } else
 2096             ip->opcode = Op_no_op;
 2097         sub_counter++;  /* count # of dimensions */
 2098         $$ = $1;
 2099       }
 2100     ;
 2101 
 2102 bracketed_exp_list
 2103     : '[' expression_list ']'
 2104       {
 2105         INSTRUCTION *t = $2;
 2106         if ($2 == NULL) {
 2107             error_ln($3->source_line,
 2108                 _("invalid subscript expression"));
 2109             /* install Null string as subscript. */
 2110             t = list_create(instruction(Op_push_i));
 2111             t->nexti->memory = dupnode(Nnull_string);
 2112             $3->sub_count = 1;
 2113         } else
 2114             $3->sub_count = count_expressions(&t, false);
 2115         $$ = list_append(t, $3);
 2116       }
 2117     ;
 2118 
 2119 subscript
 2120     : bracketed_exp_list
 2121       { $$ = $1; }
 2122     | subscript bracketed_exp_list
 2123       {
 2124         $$ = list_merge($1, $2);
 2125       }
 2126     ;
 2127 
 2128 subscript_list
 2129     : subscript SUBSCRIPT
 2130       { $$ = $1; }
 2131     ;
 2132 
 2133 simple_variable
 2134     : NAME
 2135       {
 2136         $1->opcode = Op_push;
 2137         $1->memory = variable($1->source_line, $1->lextok, Node_var_new);
 2138         $$ = list_create($1);
 2139       }
 2140     | NAME subscript_list
 2141       {
 2142         char *arr = $1->lextok;
 2143 
 2144         $1->memory = variable($1->source_line, arr, Node_var_new);
 2145         $1->opcode = Op_push_array;
 2146         $$ = list_prepend($2, $1);
 2147       }
 2148     ;
 2149 
 2150 variable
 2151     : simple_variable
 2152       {
 2153         INSTRUCTION *ip = $1->nexti;
 2154         if (ip->opcode == Op_push
 2155             && ip->memory->type == Node_var
 2156             && ip->memory->var_update
 2157         ) {
 2158             $$ = list_prepend($1, instruction(Op_var_update));
 2159             $$->nexti->update_var = ip->memory->var_update;
 2160         } else
 2161             $$ = $1;
 2162       }
 2163     | '$' non_post_simp_exp opt_incdec
 2164       {
 2165         $$ = list_append($2, $1);
 2166         if ($3 != NULL)
 2167             mk_assignment($2, NULL, $3);
 2168       }
 2169     ;
 2170 
 2171 opt_incdec
 2172     : INCREMENT
 2173       {
 2174         $1->opcode = Op_postincrement;
 2175       }
 2176     | DECREMENT
 2177       {
 2178         $1->opcode = Op_postdecrement;
 2179       }
 2180     | /* empty */
 2181       { $$ = NULL; }
 2182     ;
 2183 
 2184 l_brace
 2185     : '{' opt_nls { $$ = $2; }
 2186     ;
 2187 
 2188 r_brace
 2189     : '}' opt_nls   { $$ = $2; yyerrok; }
 2190     ;
 2191 
 2192 r_paren
 2193     : ')' { yyerrok; }
 2194     ;
 2195 
 2196 opt_semi
 2197     : /* empty */
 2198       { $$ = NULL; }
 2199     | semi
 2200     ;
 2201 
 2202 semi
 2203     : ';'   { yyerrok; }
 2204     ;
 2205 
 2206 colon
 2207     : ':'   { $$ = $1; yyerrok; }
 2208     ;
 2209 
 2210 comma
 2211     : ',' opt_nls   { $$ = $2; yyerrok; }
 2212     ;
 2213 %%
 2214 
 2215 struct token {
 2216     const char *operator;   /* text to match */
 2217     OPCODE value;           /*  type */
 2218     int class;              /* lexical class */
 2219     unsigned flags;         /* # of args. allowed and compatability */
 2220 #   define  ARGS    0xFF    /* 0, 1, 2, 3 args allowed (any combination */
 2221 #   define  A(n)    (1<<(n))
 2222 #   define  VERSION_MASK    0xFF00  /* old awk is zero */
 2223 #   define  NOT_OLD     0x0100  /* feature not in old awk */
 2224 #   define  NOT_POSIX   0x0200  /* feature not in POSIX */
 2225 #   define  GAWKX       0x0400  /* gawk extension */
 2226 #   define  BREAK       0x0800  /* break allowed inside */
 2227 #   define  CONTINUE    0x1000  /* continue allowed inside */
 2228 #   define  DEBUG_USE   0x2000  /* for use by developers */
 2229 
 2230     NODE *(*ptr)(int);  /* function that implements this keyword */
 2231     NODE *(*ptr2)(int); /* alternate arbitrary-precision function */
 2232 };
 2233 
 2234 #ifdef USE_EBCDIC
 2235 /* tokcompare --- lexicographically compare token names for sorting */
 2236 
 2237 static int
 2238 tokcompare(const void *l, const void *r)
 2239 {
 2240     struct token *lhs, *rhs;
 2241 
 2242     lhs = (struct token *) l;
 2243     rhs = (struct token *) r;
 2244 
 2245     return strcmp(lhs->operator, rhs->operator);
 2246 }
 2247 #endif
 2248 
 2249 /*
 2250  * Tokentab is sorted ASCII ascending order, so it can be binary searched.
 2251  * See check_special(), which sorts the table on EBCDIC systems.
 2252  * Function pointers come from declarations in awk.h.
 2253  */
 2254 
 2255 #ifdef HAVE_MPFR
 2256 #define MPF(F) do_mpfr_##F
 2257 #else
 2258 #define MPF(F) 0
 2259 #endif
 2260 
 2261 static const struct token tokentab[] = {
 2262 {"BEGIN",   Op_rule,     LEX_BEGIN, 0,      0,  0},
 2263 {"BEGINFILE",   Op_rule,     LEX_BEGINFILE, GAWKX,      0,  0},
 2264 {"END",     Op_rule,     LEX_END,   0,      0,  0},
 2265 {"ENDFILE", Op_rule,     LEX_ENDFILE,   GAWKX,      0,  0},
 2266 #ifdef ARRAYDEBUG
 2267 {"adump",   Op_builtin,    LEX_BUILTIN, GAWKX|A(1)|A(2)|DEBUG_USE,  do_adump,   0},
 2268 #endif
 2269 {"and",     Op_builtin,    LEX_BUILTIN, GAWKX,      do_and, MPF(and)},
 2270 {"asort",   Op_builtin,  LEX_BUILTIN,   GAWKX|A(1)|A(2)|A(3),   do_asort,   0},
 2271 {"asorti",  Op_builtin,  LEX_BUILTIN,   GAWKX|A(1)|A(2)|A(3),   do_asorti,  0},
 2272 {"atan2",   Op_builtin,  LEX_BUILTIN,   NOT_OLD|A(2),   do_atan2,   MPF(atan2)},
 2273 {"bindtextdomain",  Op_builtin,  LEX_BUILTIN,   GAWKX|A(1)|A(2),    do_bindtextdomain,  0},
 2274 {"break",   Op_K_break,  LEX_BREAK, 0,      0,  0},
 2275 {"case",    Op_K_case,   LEX_CASE,  GAWKX,      0,  0},
 2276 {"close",   Op_builtin,  LEX_BUILTIN,   NOT_OLD|A(1)|A(2),  do_close,   0},
 2277 {"compl",   Op_builtin,    LEX_BUILTIN, GAWKX|A(1), do_compl,   MPF(compl)},
 2278 {"continue",    Op_K_continue, LEX_CONTINUE,    0,      0,  0},
 2279 {"cos",     Op_builtin,  LEX_BUILTIN,   NOT_OLD|A(1),   do_cos, MPF(cos)},
 2280 {"dcgettext",   Op_builtin,  LEX_BUILTIN,   GAWKX|A(1)|A(2)|A(3),   do_dcgettext,   0},
 2281 {"dcngettext",  Op_builtin,  LEX_BUILTIN,   GAWKX|A(1)|A(2)|A(3)|A(4)|A(5), do_dcngettext,  0},
 2282 {"default", Op_K_default,    LEX_DEFAULT,   GAWKX,      0,  0},
 2283 {"delete",  Op_K_delete,     LEX_DELETE,    NOT_OLD,    0,  0},
 2284 {"do",      Op_K_do,     LEX_DO,    NOT_OLD|BREAK|CONTINUE, 0,  0},
 2285 {"else",    Op_K_else,   LEX_ELSE,  0,      0,  0},
 2286 {"eval",    Op_symbol,   LEX_EVAL,  0,      0,  0},
 2287 {"exit",    Op_K_exit,   LEX_EXIT,  0,      0,  0},
 2288 {"exp",     Op_builtin,  LEX_BUILTIN,   A(1),       do_exp, MPF(exp)},
 2289 {"fflush",  Op_builtin,  LEX_BUILTIN,   A(0)|A(1), do_fflush,   0},
 2290 {"for",     Op_K_for,    LEX_FOR,   BREAK|CONTINUE, 0,  0},
 2291 {"func",    Op_func,    LEX_FUNCTION,   NOT_POSIX|NOT_OLD,  0,  0},
 2292 {"function",    Op_func,    LEX_FUNCTION,   NOT_OLD,    0,  0},
 2293 {"gensub",  Op_sub_builtin,  LEX_BUILTIN,   GAWKX|A(3)|A(4), 0, 0},
 2294 {"getline", Op_K_getline_redir,  LEX_GETLINE,   NOT_OLD,    0,  0},
 2295 {"gsub",    Op_sub_builtin,  LEX_BUILTIN,   NOT_OLD|A(2)|A(3), 0,   0},
 2296 {"if",      Op_K_if,     LEX_IF,    0,      0,  0},
 2297 {"in",      Op_symbol,   LEX_IN,    0,      0,  0},
 2298 {"include", Op_symbol,   LEX_INCLUDE,   GAWKX,  0,  0},
 2299 {"index",   Op_builtin,  LEX_BUILTIN,   A(2),       do_index,   0},
 2300 {"int",     Op_builtin,  LEX_BUILTIN,   A(1),       do_int, MPF(int)},
 2301 #ifdef SUPPLY_INTDIV
 2302 {"intdiv0", Op_builtin,  LEX_BUILTIN,   GAWKX|A(3), do_intdiv,  MPF(intdiv)},
 2303 #endif
 2304 {"isarray", Op_builtin,  LEX_BUILTIN,   GAWKX|A(1), do_isarray, 0},
 2305 {"length",  Op_builtin,  LEX_LENGTH,    A(0)|A(1),  do_length,  0},
 2306 {"load",    Op_symbol,   LEX_LOAD,  GAWKX,      0,  0},
 2307 {"log",     Op_builtin,  LEX_BUILTIN,   A(1),       do_log, MPF(log)},
 2308 {"lshift",  Op_builtin,    LEX_BUILTIN, GAWKX|A(2), do_lshift,  MPF(lshift)},
 2309 {"match",   Op_builtin,  LEX_BUILTIN,   NOT_OLD|A(2)|A(3), do_match,    0},
 2310 {"mktime",  Op_builtin,  LEX_BUILTIN,   GAWKX|A(1)|A(2), do_mktime, 0},
 2311 {"namespace",   Op_symbol,   LEX_NAMESPACE, GAWKX,      0,  0},
 2312 {"next",    Op_K_next,   LEX_NEXT,  0,      0,  0},
 2313 {"nextfile",    Op_K_nextfile, LEX_NEXTFILE,    0,      0,  0},
 2314 {"or",      Op_builtin,    LEX_BUILTIN, GAWKX,      do_or,  MPF(or)},
 2315 {"patsplit",    Op_builtin,    LEX_BUILTIN, GAWKX|A(2)|A(3)|A(4), do_patsplit,  0},
 2316 {"print",   Op_K_print,  LEX_PRINT, 0,      0,  0},
 2317 {"printf",  Op_K_printf,     LEX_PRINTF,    0,      0,  0},
 2318 {"rand",    Op_builtin,  LEX_BUILTIN,   NOT_OLD|A(0),   do_rand,    MPF(rand)},
 2319 {"return",  Op_K_return,     LEX_RETURN,    NOT_OLD,    0,  0},
 2320 {"rshift",  Op_builtin,    LEX_BUILTIN, GAWKX|A(2), do_rshift,  MPF(rshift)},
 2321 {"sin",     Op_builtin,  LEX_BUILTIN,   NOT_OLD|A(1),   do_sin, MPF(sin)},
 2322 {"split",   Op_builtin,  LEX_BUILTIN,   A(2)|A(3)|A(4), do_split,   0},
 2323 {"sprintf", Op_builtin,  LEX_BUILTIN,   0,      do_sprintf, 0},
 2324 {"sqrt",    Op_builtin,  LEX_BUILTIN,   A(1),       do_sqrt,    MPF(sqrt)},
 2325 {"srand",   Op_builtin,  LEX_BUILTIN,   NOT_OLD|A(0)|A(1), do_srand,    MPF(srand)},
 2326 #if defined(GAWKDEBUG) || defined(ARRAYDEBUG) /* || ... */
 2327 {"stopme",  Op_builtin, LEX_BUILTIN,    GAWKX|A(0)|DEBUG_USE,   stopme,     0},
 2328 #endif
 2329 {"strftime",    Op_builtin,  LEX_BUILTIN,   GAWKX|A(0)|A(1)|A(2)|A(3), do_strftime, 0},
 2330 {"strtonum",    Op_builtin,    LEX_BUILTIN, GAWKX|A(1), do_strtonum, MPF(strtonum)},
 2331 {"sub",     Op_sub_builtin,  LEX_BUILTIN,   NOT_OLD|A(2)|A(3), 0,   0},
 2332 {"substr",  Op_builtin,  LEX_BUILTIN,   A(2)|A(3),  do_substr,  0},
 2333 {"switch",  Op_K_switch,     LEX_SWITCH,    GAWKX|BREAK,    0,  0},
 2334 {"system",  Op_builtin,  LEX_BUILTIN,   NOT_OLD|A(1),   do_system,  0},
 2335 {"systime", Op_builtin,  LEX_BUILTIN,   GAWKX|A(0), do_systime, 0},
 2336 {"tolower", Op_builtin,  LEX_BUILTIN,   NOT_OLD|A(1),   do_tolower, 0},
 2337 {"toupper", Op_builtin,  LEX_BUILTIN,   NOT_OLD|A(1),   do_toupper, 0},
 2338 {"typeof",  Op_builtin,  LEX_BUILTIN,   GAWKX|A(1)|A(2), do_typeof, 0},
 2339 {"while",   Op_K_while,  LEX_WHILE, BREAK|CONTINUE, 0,  0},
 2340 {"xor",     Op_builtin,    LEX_BUILTIN, GAWKX,      do_xor, MPF(xor)},
 2341 };
 2342 
 2343 /* Variable containing the current shift state.  */
 2344 static mbstate_t cur_mbstate;
 2345 /* Ring buffer containing current characters.  */
 2346 #define MAX_CHAR_IN_RING_BUFFER 8
 2347 #define RING_BUFFER_SIZE (MAX_CHAR_IN_RING_BUFFER * MB_LEN_MAX)
 2348 static char cur_char_ring[RING_BUFFER_SIZE];
 2349 /* Index for ring buffers.  */
 2350 static int cur_ring_idx;
 2351 /* This macro means that last nextc() return a singlebyte character
 2352    or 1st byte of a multibyte character.  */
 2353 #define nextc_is_1stbyte (cur_char_ring[cur_ring_idx] == 1)
 2354 
 2355 /* getfname --- return name of a builtin function (for pretty printing) */
 2356 
 2357 const char *
 2358 getfname(NODE *(*fptr)(int), bool prepend_awk)
 2359 {
 2360     int i, j;
 2361     static char buf[100];
 2362 
 2363     j = sizeof(tokentab) / sizeof(tokentab[0]);
 2364     /* linear search, no other way to do it */
 2365     for (i = 0; i < j; i++) {
 2366         if (tokentab[i].ptr == fptr || tokentab[i].ptr2 == fptr) {
 2367             if (prepend_awk && (tokentab[i].flags & GAWKX) != 0) {
 2368                 sprintf(buf, "awk::%s", tokentab[i].operator);
 2369                 return buf;
 2370             }
 2371             return tokentab[i].operator;
 2372         }
 2373     }
 2374 
 2375     return NULL;
 2376 }
 2377 
 2378 /* negate_num --- negate a number in NODE */
 2379 
 2380 void
 2381 negate_num(NODE *n)
 2382 {
 2383 #ifdef HAVE_MPFR
 2384     int tval = 0;
 2385 #endif
 2386 
 2387     add_sign_to_num(n, '-');
 2388 
 2389     if (! is_mpg_number(n)) {
 2390         n->numbr = -n->numbr;
 2391         return;
 2392     }
 2393 
 2394 #ifdef HAVE_MPFR
 2395     if (is_mpg_integer(n)) {
 2396         if (! iszero(n)) {
 2397             mpz_neg(n->mpg_i, n->mpg_i);
 2398             return;
 2399         }
 2400 
 2401         /*
 2402          * 0 --> -0 conversion. Requires turning the MPG integer
 2403          * into an MPFR float.
 2404          */
 2405 
 2406         mpz_clear(n->mpg_i);    /* release the integer storage */
 2407 
 2408         /* Convert and fall through. */
 2409         tval = mpfr_set_d(n->mpg_numbr, 0.0, ROUND_MODE);
 2410         IEEE_FMT(n->mpg_numbr, tval);
 2411         n->flags &= ~MPZN;
 2412         n->flags |= MPFN;
 2413     }
 2414 
 2415     /* mpfr float case */
 2416     tval = mpfr_neg(n->mpg_numbr, n->mpg_numbr, ROUND_MODE);
 2417     IEEE_FMT(n->mpg_numbr, tval);
 2418 #endif
 2419 }
 2420 
 2421 /* add_sign_to_num --- make a constant unary plus or minus for profiling */
 2422 
 2423 static void
 2424 add_sign_to_num(NODE *n, char sign)
 2425 {
 2426     if ((n->flags & NUMCONSTSTR) != 0) {
 2427         char *s;
 2428 
 2429         s = n->stptr;
 2430         memmove(& s[1], & s[0], n->stlen + 1);
 2431         s[0] = sign;
 2432         n->stlen++;
 2433     }
 2434 }
 2435 
 2436 /* print_included_from --- print `Included from ..' file names and locations */
 2437 
 2438 static void
 2439 print_included_from()
 2440 {
 2441     int saveline, line;
 2442     SRCFILE *s;
 2443 
 2444     /* suppress current file name, line # from `.. included from ..' msgs */
 2445     saveline = sourceline;
 2446     sourceline = 0;
 2447 
 2448     for (s = sourcefile; s != NULL && s->stype == SRC_INC; ) {
 2449         s = s->next;
 2450         if (s == NULL || s->fd <= INVALID_HANDLE)
 2451             continue;
 2452         line = s->srclines;
 2453 
 2454         /* if last token is NEWLINE, line number is off by 1. */
 2455         if (s->lasttok == NEWLINE)
 2456             line--;
 2457         msg("%s %s:%d%c",
 2458             s->prev == sourcefile ? "In file included from"
 2459                       : "                 from",
 2460             (s->stype == SRC_INC ||
 2461                  s->stype == SRC_FILE) ? s->src : "cmd. line",
 2462             line,
 2463             s->stype == SRC_INC ? ',' : ':'
 2464         );
 2465     }
 2466     sourceline = saveline;
 2467 }
 2468 
 2469 /* warning_ln --- print a warning message with location */
 2470 
 2471 static void
 2472 warning_ln(int line, const char *mesg, ...)
 2473 {
 2474     va_list args;
 2475     int saveline;
 2476 
 2477     saveline = sourceline;
 2478     sourceline = line;
 2479     print_included_from();
 2480     va_start(args, mesg);
 2481     err(false, _("warning: "), mesg, args);
 2482     va_end(args);
 2483     sourceline = saveline;
 2484 }
 2485 
 2486 /* lintwarn_ln --- print a lint warning and location */
 2487 
 2488 static void
 2489 lintwarn_ln(int line, const char *mesg, ...)
 2490 {
 2491     va_list args;
 2492     int saveline;
 2493 
 2494     saveline = sourceline;
 2495     sourceline = line;
 2496     print_included_from();
 2497     va_start(args, mesg);
 2498     if (lintfunc == r_fatal)
 2499         err(true, _("fatal: "), mesg, args);
 2500     else
 2501         err(false, _("warning: "), mesg, args);
 2502     va_end(args);
 2503     sourceline = saveline;
 2504     if (lintfunc == r_fatal)
 2505         gawk_exit(EXIT_FATAL);
 2506 }
 2507 
 2508 /* error_ln --- print an error message and location */
 2509 
 2510 static void
 2511 error_ln(int line, const char *m, ...)
 2512 {
 2513     va_list args;
 2514     int saveline;
 2515 
 2516     saveline = sourceline;
 2517     sourceline = line;
 2518     print_included_from();
 2519     errcount++;
 2520     va_start(args, m);
 2521     err(false, "error: ", m, args);
 2522     va_end(args);
 2523     sourceline = saveline;
 2524 }
 2525 
 2526 /* yyerror --- print a syntax error message, show where */
 2527 
 2528 static void
 2529 yyerror(const char *m, ...)
 2530 {
 2531     va_list args;
 2532     const char *mesg = NULL;
 2533     char *bp, *cp;
 2534     char *scan;
 2535     char *buf;
 2536     int count;
 2537     static char end_of_file_line[] = "(END OF FILE)";
 2538     static char syntax_error[] = "syntax error";
 2539     static size_t syn_err_len = sizeof(syntax_error) - 1;
 2540     bool generic_error = (strncmp(m, syntax_error, syn_err_len) == 0);
 2541 
 2542     print_included_from();
 2543 
 2544     errcount++;
 2545     /* Find the current line in the input file */
 2546     if (lexptr && lexeme) {
 2547         if (thisline == NULL) {
 2548             cp = lexeme;
 2549             if (*cp == '\n') {
 2550                 if (cp > lexptr_begin)
 2551                     cp--;
 2552                 mesg = _("unexpected newline or end of string");
 2553             }
 2554             for (; cp != lexptr_begin && *cp != '\n'; --cp)
 2555                 continue;
 2556             if (*cp == '\n')
 2557                 cp++;
 2558             thisline = cp;
 2559         }
 2560         /* NL isn't guaranteed */
 2561         bp = lexeme;
 2562         if (bp < thisline)
 2563             bp = thisline + 1;
 2564         while (bp < lexend && *bp && *bp != '\n')
 2565             bp++;
 2566     } else {
 2567         thisline = end_of_file_line;
 2568         bp = thisline + strlen(thisline);
 2569     }
 2570 
 2571     if (lexeof && mesg == NULL && generic_error) {
 2572         msg("%s", end_of_file_line);
 2573         mesg = _("source files / command-line arguments must contain complete functions or rules");
 2574     } else
 2575         msg("%.*s", (int) (bp - thisline), thisline);
 2576 
 2577     va_start(args, m);
 2578     if (mesg == NULL)
 2579         mesg = m;
 2580 
 2581     count = strlen(mesg) + 1;
 2582     if (lexptr != NULL)
 2583         count += (lexeme - thisline) + 2;
 2584     ezalloc(buf, char *, count+1, "yyerror");
 2585 
 2586     bp = buf;
 2587 
 2588     if (lexptr != NULL) {
 2589         scan = thisline;
 2590         while (scan < lexeme)
 2591             if (*scan++ == '\t')
 2592                 *bp++ = '\t';
 2593             else
 2594                 *bp++ = ' ';
 2595         *bp++ = '^';
 2596         *bp++ = ' ';
 2597     }
 2598     strcpy(bp, mesg);
 2599     err(false, "", buf, args);
 2600     va_end(args);
 2601     efree(buf);
 2602 }
 2603 
 2604 /* mk_program --- create a single list of instructions */
 2605 
 2606 static INSTRUCTION *
 2607 mk_program()
 2608 {
 2609     INSTRUCTION *cp, *tmp;
 2610 
 2611 #define begin_block         rule_block[BEGIN]
 2612 #define end_block           rule_block[END]
 2613 #define prog_block          rule_block[Rule]
 2614 #define beginfile_block     rule_block[BEGINFILE]
 2615 #define endfile_block       rule_block[ENDFILE]
 2616 
 2617     if (end_block == NULL)
 2618         end_block = list_create(ip_end);
 2619     else
 2620         (void) list_prepend(end_block, ip_end);
 2621 
 2622     if (! in_main_context()) {
 2623         if (begin_block != NULL && prog_block != NULL)
 2624             cp = list_merge(begin_block, prog_block);
 2625         else
 2626             cp = (begin_block != NULL) ? begin_block : prog_block;
 2627 
 2628         if (cp != NULL)
 2629             (void) list_merge(cp, end_block);
 2630         else
 2631             cp = end_block;
 2632 
 2633         (void) list_append(cp, instruction(Op_stop));
 2634         goto out;
 2635     }
 2636 
 2637     if (endfile_block == NULL)
 2638         endfile_block = list_create(ip_endfile);
 2639     else {
 2640         ip_rec->has_endfile = true;
 2641         (void) list_prepend(endfile_block, ip_endfile);
 2642     }
 2643 
 2644     if (beginfile_block == NULL)
 2645         beginfile_block = list_create(ip_beginfile);
 2646     else
 2647         (void) list_prepend(beginfile_block, ip_beginfile);
 2648 
 2649     if (prog_block == NULL) {
 2650         if (end_block->nexti == end_block->lasti
 2651                 && beginfile_block->nexti == beginfile_block->lasti
 2652                 && endfile_block->nexti == endfile_block->lasti
 2653         ) {
 2654             /* no pattern-action and (real) end, beginfile or endfile blocks */
 2655             bcfree(ip_rec);
 2656             bcfree(ip_newfile);
 2657             ip_rec = ip_newfile = NULL;
 2658 
 2659             list_append(beginfile_block, instruction(Op_after_beginfile));
 2660             (void) list_append(endfile_block, instruction(Op_after_endfile));
 2661 
 2662             if (begin_block == NULL)     /* no program at all */
 2663                 cp = end_block;
 2664             else
 2665                 cp = list_merge(begin_block, end_block);
 2666 
 2667             if (interblock_comment != NULL) {
 2668                 (void) list_append(cp, interblock_comment);
 2669                 interblock_comment = NULL;
 2670             }
 2671 
 2672             (void) list_append(cp, ip_atexit);
 2673             (void) list_append(cp, instruction(Op_stop));
 2674 
 2675             /* append beginfile_block and endfile_block for sole use
 2676              * in getline without redirection (Op_K_getline).
 2677              */
 2678 
 2679             (void) list_merge(cp, beginfile_block);
 2680             (void) list_merge(cp, endfile_block);
 2681 
 2682             if (outer_comment != NULL) {
 2683                 cp = list_merge(list_create(outer_comment), cp);
 2684                 outer_comment = NULL;
 2685             }
 2686 
 2687             if (interblock_comment != NULL) {
 2688                 (void) list_append(cp, interblock_comment);
 2689                 interblock_comment = NULL;
 2690             }
 2691 
 2692             goto out;
 2693 
 2694         } else {
 2695             /* install a do-nothing prog block */
 2696             prog_block = list_create(instruction(Op_no_op));
 2697         }
 2698     }
 2699 
 2700     (void) list_append(endfile_block, instruction(Op_after_endfile));
 2701     (void) list_prepend(prog_block, ip_rec);
 2702     (void) list_append(prog_block, instruction(Op_jmp));
 2703     prog_block->lasti->target_jmp = ip_rec;
 2704 
 2705     list_append(beginfile_block, instruction(Op_after_beginfile));
 2706 
 2707     cp = list_merge(beginfile_block, prog_block);
 2708     (void) list_prepend(cp, ip_newfile);
 2709     (void) list_merge(cp, endfile_block);
 2710     (void) list_merge(cp, end_block);
 2711     if (begin_block != NULL)
 2712         cp = list_merge(begin_block, cp);
 2713 
 2714     if (outer_comment != NULL) {
 2715         cp = list_merge(list_create(outer_comment), cp);
 2716         outer_comment = NULL;
 2717     }
 2718 
 2719     if (interblock_comment != NULL) {
 2720         (void) list_append(cp, interblock_comment);
 2721         interblock_comment = NULL;
 2722     }
 2723 
 2724     (void) list_append(cp, ip_atexit);
 2725     (void) list_append(cp, instruction(Op_stop));
 2726 
 2727 out:
 2728     /* delete the Op_list, not needed */
 2729     tmp = cp->nexti;
 2730     bcfree(cp);
 2731     return tmp;
 2732 
 2733 #undef begin_block
 2734 #undef end_block
 2735 #undef prog_block
 2736 #undef beginfile_block
 2737 #undef endfile_block
 2738 }
 2739 
 2740 /* parse_program --- read in the program and convert into a list of instructions */
 2741 
 2742 int
 2743 parse_program(INSTRUCTION **pcode, bool from_eval)
 2744 {
 2745     int ret;
 2746 
 2747     called_from_eval = from_eval;
 2748 
 2749     /* pre-create non-local jump targets
 2750      * ip_end (Op_no_op) -- used as jump target for `exit'
 2751      * outside an END block.
 2752      */
 2753     ip_end = instruction(Op_no_op);
 2754 
 2755     if (! in_main_context())
 2756         ip_newfile = ip_rec = ip_atexit = ip_beginfile = ip_endfile = NULL;
 2757     else {
 2758         ip_endfile = instruction(Op_no_op);
 2759         main_beginfile = ip_beginfile = instruction(Op_no_op);
 2760         ip_rec = instruction(Op_get_record); /* target for `next', also ip_newfile */
 2761         ip_newfile = bcalloc(Op_newfile, 2, 0); /* target for `nextfile' */
 2762         ip_newfile->target_jmp = ip_end;
 2763         ip_newfile->target_endfile = ip_endfile;
 2764         (ip_newfile + 1)->target_get_record = ip_rec;
 2765         ip_rec->target_newfile = ip_newfile;
 2766         ip_atexit = instruction(Op_atexit); /* target for `exit' in END block */
 2767     }
 2768 
 2769     for (sourcefile = srcfiles->next; sourcefile->stype == SRC_EXTLIB;
 2770             sourcefile = sourcefile->next)
 2771         ;
 2772 
 2773     lexeof = false;
 2774     lexptr = NULL;
 2775     lasttok = 0;
 2776     memset(rule_block, 0, sizeof(ruletab) * sizeof(INSTRUCTION *));
 2777     errcount = 0;
 2778     tok = tokstart != NULL ? tokstart : tokexpand();
 2779 
 2780     ret = yyparse();
 2781     *pcode = mk_program();
 2782 
 2783     /* avoid false source indications */
 2784     source = NULL;
 2785     sourceline = 0;
 2786     if (ret == 0)   /* avoid spurious warning if parser aborted with YYABORT */
 2787         check_funcs();
 2788 
 2789     if (do_posix && ! check_param_names())
 2790         errcount++;
 2791 
 2792     if (args_array == NULL)
 2793         emalloc(args_array, NODE **, (max_args + 2) * sizeof(NODE *), "parse_program");
 2794     else
 2795         erealloc(args_array, NODE **, (max_args + 2) * sizeof(NODE *), "parse_program");
 2796 
 2797     return (ret || errcount);
 2798 }
 2799 
 2800 /* free_srcfile --- free a SRCFILE struct */
 2801 
 2802 void
 2803 free_srcfile(SRCFILE *thisfile)
 2804 {
 2805     efree(thisfile->src);
 2806     efree(thisfile);
 2807 }
 2808 
 2809 /* do_add_srcfile --- add one item to srcfiles */
 2810 
 2811 static SRCFILE *
 2812 do_add_srcfile(enum srctype stype, char *src, char *path, SRCFILE *thisfile)
 2813 {
 2814     SRCFILE *s;
 2815 
 2816     ezalloc(s, SRCFILE *, sizeof(SRCFILE), "do_add_srcfile");
 2817     s->src = estrdup(src, strlen(src));
 2818     s->fullpath = path;
 2819     s->stype = stype;
 2820     s->fd = INVALID_HANDLE;
 2821     s->next = thisfile;
 2822     s->prev = thisfile->prev;
 2823     thisfile->prev->next = s;
 2824     thisfile->prev = s;
 2825     return s;
 2826 }
 2827 
 2828 /* add_srcfile --- add one item to srcfiles after checking if
 2829  *              a source file exists and not already in list.
 2830  */
 2831 
 2832 SRCFILE *
 2833 add_srcfile(enum srctype stype, char *src, SRCFILE *thisfile, bool *already_included, int *errcode)
 2834 {
 2835     SRCFILE *s;
 2836     struct stat sbuf;
 2837     char *path;
 2838     int errno_val = 0;
 2839 
 2840     if (already_included)
 2841         *already_included = false;
 2842     if (errcode)
 2843         *errcode = 0;
 2844     if (stype == SRC_CMDLINE || stype == SRC_STDIN)
 2845         return do_add_srcfile(stype, src, NULL, thisfile);
 2846 
 2847     path = find_source(src, & sbuf, & errno_val, stype == SRC_EXTLIB);
 2848     if (path == NULL) {
 2849         if (errcode) {
 2850             *errcode = errno_val;
 2851             return NULL;
 2852         }
 2853         /* use full messages to ease translation */
 2854         fatal(stype != SRC_EXTLIB
 2855             ? _("cannot open source file `%s' for reading: %s")
 2856             : _("cannot open shared library `%s' for reading: %s"),
 2857                 src,
 2858                 errno_val ? strerror(errno_val) : _("reason unknown"));
 2859     }
 2860 
 2861     /* N.B. We do not eliminate duplicate SRC_FILE (-f) programs. */
 2862     for (s = srcfiles->next; s != srcfiles; s = s->next) {
 2863         if ((s->stype == SRC_FILE || s->stype == SRC_INC || s->stype == SRC_EXTLIB) && files_are_same(path, s)) {
 2864             if (stype == SRC_INC || stype == SRC_EXTLIB) {
 2865                 /* eliminate duplicates */
 2866                 if ((stype == SRC_INC) && (s->stype == SRC_FILE))
 2867                     fatal(_("cannot include `%s' and use it as a program file"), src);
 2868 
 2869                 if (do_lint) {
 2870                     int line = sourceline;
 2871                     /* Kludge: the line number may be off for `@include file'.
 2872                      * Since, this function is also used for '-f file' in main.c,
 2873                      * sourceline > 1 check ensures that the call is at
 2874                      * parse time.
 2875                      */
 2876                     if (sourceline > 1 && lasttok == NEWLINE)
 2877                         line--;
 2878                     lintwarn_ln(line,
 2879                             stype != SRC_EXTLIB
 2880                               ? _("already included source file `%s'")
 2881                               : _("already loaded shared library `%s'"),
 2882                             src);
 2883                 }
 2884                 efree(path);
 2885                 if (already_included)
 2886                     *already_included = true;
 2887                 return NULL;
 2888             } else {
 2889                 /* duplicates are allowed for -f */
 2890                 if (s->stype == SRC_INC)
 2891                     fatal(_("cannot include `%s' and use it as a program file"), src);
 2892                 /* no need to scan for further matches, since
 2893                  * they must be of homogeneous type */
 2894                 break;
 2895             }
 2896         }
 2897     }
 2898 
 2899     s = do_add_srcfile(stype, src, path, thisfile);
 2900     s->sbuf = sbuf;
 2901     s->mtime = sbuf.st_mtime;
 2902     return s;
 2903 }
 2904 
 2905 /* include_source --- read program from source included using `@include' */
 2906 
 2907 static bool
 2908 include_source(INSTRUCTION *file, void **srcfile_p)
 2909 {
 2910     SRCFILE *s;
 2911     char *src = file->lextok;
 2912     int errcode;
 2913     bool already_included;
 2914 
 2915     *srcfile_p = NULL;
 2916 
 2917     if (do_traditional || do_posix) {
 2918         error_ln(file->source_line, _("@include is a gawk extension"));
 2919         return false;
 2920     }
 2921 
 2922     if (strlen(src) == 0) {
 2923         if (do_lint)
 2924             lintwarn_ln(file->source_line, _("empty filename after @include"));
 2925         return true;
 2926     }
 2927 
 2928     s = add_srcfile(SRC_INC, src, sourcefile, &already_included, &errcode);
 2929     if (s == NULL) {
 2930         if (already_included)
 2931             return true;
 2932         error_ln(file->source_line,
 2933             _("cannot open source file `%s' for reading: %s"),
 2934             src, errcode ? strerror(errcode) : _("reason unknown"));
 2935         return false;
 2936     }
 2937 
 2938     /* save scanner state for the current sourcefile */
 2939     sourcefile->srclines = sourceline;
 2940     sourcefile->lexptr = lexptr;
 2941     sourcefile->lexend = lexend;
 2942     sourcefile->lexptr_begin = lexptr_begin;
 2943     sourcefile->lexeme = lexeme;
 2944     sourcefile->lasttok = lasttok;
 2945     sourcefile->namespace = current_namespace;
 2946 
 2947     /* included file becomes the current source */
 2948     sourcefile = s;
 2949     lexptr = NULL;
 2950     sourceline = 0;
 2951     source = NULL;
 2952     lasttok = 0;
 2953     lexeof = false;
 2954     eof_warned = false;
 2955     current_namespace = awk_namespace;
 2956     *srcfile_p = (void *) s;
 2957     return true;
 2958 }
 2959 
 2960 /* load_library --- load a shared library */
 2961 
 2962 static bool
 2963 load_library(INSTRUCTION *file, void **srcfile_p)
 2964 {
 2965     SRCFILE *s;
 2966     char *src = file->lextok;
 2967     int errcode;
 2968     bool already_included;
 2969 
 2970     *srcfile_p = NULL;
 2971 
 2972     if (do_traditional || do_posix) {
 2973         error_ln(file->source_line, _("@load is a gawk extension"));
 2974         return false;
 2975     }
 2976 
 2977 
 2978     if (strlen(src) == 0) {
 2979         if (do_lint)
 2980             lintwarn_ln(file->source_line, _("empty filename after @load"));
 2981         return true;
 2982     }
 2983 
 2984     if (do_pretty_print && ! do_profile) {
 2985         // create a fake one, don't try to open the file
 2986         s = do_add_srcfile(SRC_EXTLIB, src, src, sourcefile);
 2987     } else {
 2988         s = add_srcfile(SRC_EXTLIB, src, sourcefile, &already_included, &errcode);
 2989         if (s == NULL) {
 2990             if (already_included)
 2991                 return true;
 2992             error_ln(file->source_line,
 2993                 _("cannot open shared library `%s' for reading: %s"),
 2994                 src, errcode ? strerror(errcode) : _("reason unknown"));
 2995             return false;
 2996         }
 2997 
 2998         load_ext(s->fullpath);
 2999     }
 3000 
 3001     *srcfile_p = (void *) s;
 3002     return true;
 3003 }
 3004 
 3005 /* next_sourcefile --- read program from the next source in srcfiles */
 3006 
 3007 static void
 3008 next_sourcefile()
 3009 {
 3010     static int (*closefunc)(int fd) = NULL;
 3011 
 3012     if (closefunc == NULL) {
 3013         char *cp = getenv("AWKREADFUNC");
 3014 
 3015         /* If necessary, one day, test value for different functions.  */
 3016         if (cp == NULL)
 3017             closefunc = close;
 3018         else
 3019             closefunc = one_line_close;
 3020     }
 3021 
 3022     /*
 3023      * This won't be true if there's an invalid character in
 3024      * the source file or source string (e.g., user typo).
 3025      * Previous versions of gawk did not core dump in such a
 3026      * case.
 3027      *
 3028      * assert(lexeof == true);
 3029      */
 3030 
 3031     lexeof = false;
 3032     eof_warned = false;
 3033     sourcefile->srclines = sourceline;  /* total no of lines in current file */
 3034     if (sourcefile->fd > INVALID_HANDLE) {
 3035         if (sourcefile->fd != fileno(stdin))  /* safety */
 3036             (*closefunc)(sourcefile->fd);
 3037         sourcefile->fd = INVALID_HANDLE;
 3038     }
 3039     if (sourcefile->buf != NULL) {
 3040         efree(sourcefile->buf);
 3041         sourcefile->buf = NULL;
 3042         sourcefile->lexptr_begin = NULL;
 3043     }
 3044 
 3045     while ((sourcefile = sourcefile->next) != NULL) {
 3046         if (sourcefile == srcfiles)
 3047             return;
 3048         if (sourcefile->stype != SRC_EXTLIB)
 3049             break;
 3050     }
 3051 
 3052     if (sourcefile->lexptr_begin != NULL) {
 3053         /* resume reading from already opened file (postponed to process '@include') */
 3054         lexptr = sourcefile->lexptr;
 3055         lexend = sourcefile->lexend;
 3056         lasttok = sourcefile->lasttok;
 3057         lexptr_begin = sourcefile->lexptr_begin;
 3058         lexeme = sourcefile->lexeme;
 3059         sourceline = sourcefile->srclines;
 3060         source = sourcefile->src;
 3061         set_current_namespace(sourcefile->namespace);
 3062     } else {
 3063         lexptr = NULL;
 3064         sourceline = 0;
 3065         source = NULL;
 3066         lasttok = 0;
 3067         set_current_namespace(awk_namespace);
 3068     }
 3069 }
 3070 
 3071 /* get_src_buf --- read the next buffer of source program */
 3072 
 3073 static char *
 3074 get_src_buf()
 3075 {
 3076     int n;
 3077     char *scan;
 3078     bool newfile;
 3079     int savelen;
 3080     struct stat sbuf;
 3081 
 3082     /*
 3083      * No argument prototype on readfunc on purpose,
 3084      * avoids problems with some ancient systems where
 3085      * the types of arguments to read() aren't up to date.
 3086      */
 3087     static ssize_t (*readfunc)() = 0;
 3088 
 3089     if (readfunc == NULL) {
 3090         char *cp = getenv("AWKREADFUNC");
 3091 
 3092         /* If necessary, one day, test value for different functions.  */
 3093         if (cp == NULL)
 3094             /*
 3095              * cast is to remove warnings on systems with
 3096              * different return types for read.
 3097              */
 3098             readfunc = ( ssize_t(*)() ) read;
 3099         else
 3100             readfunc = read_one_line;
 3101     }
 3102 
 3103     newfile = false;
 3104     if (sourcefile == srcfiles)
 3105         return NULL;
 3106 
 3107     if (sourcefile->stype == SRC_CMDLINE) {
 3108         if (sourcefile->bufsize == 0) {
 3109             sourcefile->bufsize = strlen(sourcefile->src);
 3110             lexptr = lexptr_begin = lexeme = sourcefile->src;
 3111             lexend = lexptr + sourcefile->bufsize;
 3112             sourceline = 1;
 3113             if (sourcefile->bufsize == 0) {
 3114                 /*
 3115                  * Yet Another Special case:
 3116                  *  gawk '' /path/name
 3117                  * Sigh.
 3118                  */
 3119                 static bool warned = false;
 3120 
 3121                 if (do_lint && ! warned) {
 3122                     warned = true;
 3123                     lintwarn(_("empty program text on command line"));
 3124                 }
 3125                 lexeof = true;
 3126             }
 3127         } else if (sourcefile->buf == NULL  && *(lexptr-1) != '\n') {
 3128             /*
 3129              * The following goop is to ensure that the source
 3130              * ends with a newline and that the entire current
 3131              * line is available for error messages.
 3132              */
 3133             int offset;
 3134             char *buf;
 3135 
 3136             offset = lexptr - lexeme;
 3137             for (scan = lexeme; scan > lexptr_begin; scan--)
 3138                 if (*scan == '\n') {
 3139                     scan++;
 3140                     break;
 3141                 }
 3142             savelen = lexptr - scan;
 3143             emalloc(buf, char *, savelen + 1, "get_src_buf");
 3144             memcpy(buf, scan, savelen);
 3145             thisline = buf;
 3146             lexptr = buf + savelen;
 3147             *lexptr = '\n';
 3148             lexeme = lexptr - offset;
 3149             lexptr_begin = buf;
 3150             lexend = lexptr + 1;
 3151             sourcefile->buf = buf;
 3152         } else
 3153             lexeof = true;
 3154         return lexptr;
 3155     }
 3156 
 3157     if (sourcefile->fd <= INVALID_HANDLE) {
 3158         int fd;
 3159         int l;
 3160 
 3161         source = sourcefile->src;
 3162         if (source == NULL)
 3163             return NULL;
 3164         fd = srcopen(sourcefile);
 3165         if (fd <= INVALID_HANDLE) {
 3166             char *in;
 3167 
 3168             /* suppress file name and line no. in error mesg */
 3169             in = source;
 3170             source = NULL;
 3171             error(_("cannot open source file `%s' for reading: %s"),
 3172                 in, strerror(errno));
 3173             errcount++;
 3174             lexeof = true;
 3175             return sourcefile->src;
 3176         }
 3177 
 3178         sourcefile->fd = fd;
 3179         l = optimal_bufsize(fd, &sbuf);
 3180         /*
 3181          * Make sure that something silly like
 3182          *  AWKBUFSIZE=8 make check
 3183          * works ok.
 3184          */
 3185 #define A_DECENT_BUFFER_SIZE    128
 3186         if (l < A_DECENT_BUFFER_SIZE)
 3187             l = A_DECENT_BUFFER_SIZE;
 3188 #undef A_DECENT_BUFFER_SIZE
 3189         sourcefile->bufsize = l;
 3190         newfile = true;
 3191         emalloc(sourcefile->buf, char *, sourcefile->bufsize, "get_src_buf");
 3192         memset(sourcefile->buf, '\0', sourcefile->bufsize); // keep valgrind happy
 3193         lexptr = lexptr_begin = lexeme = sourcefile->buf;
 3194         savelen = 0;
 3195         sourceline = 1;
 3196         thisline = NULL;
 3197     } else {
 3198         /*
 3199          * Here, we retain the current source line in the beginning of the buffer.
 3200          */
 3201         int offset;
 3202         for (scan = lexeme; scan > lexptr_begin; scan--)
 3203             if (*scan == '\n') {
 3204                 scan++;
 3205                 break;
 3206             }
 3207 
 3208         savelen = lexptr - scan;
 3209         offset = lexptr - lexeme;
 3210 
 3211         if (savelen > 0) {
 3212             /*
 3213              * Need to make sure we have room left for reading new text;
 3214              * grow the buffer (by doubling, an arbitrary choice), if the retained line
 3215              * takes up more than a certain percentage (50%, again an arbitrary figure)
 3216              * of the available space.
 3217              */
 3218 
 3219             if (savelen > sourcefile->bufsize / 2) { /* long line or token  */
 3220                 sourcefile->bufsize *= 2;
 3221                 erealloc(sourcefile->buf, char *, sourcefile->bufsize, "get_src_buf");
 3222                 scan = sourcefile->buf + (scan - lexptr_begin);
 3223                 lexptr_begin = sourcefile->buf;
 3224             }
 3225 
 3226             thisline = lexptr_begin;
 3227             memmove(thisline, scan, savelen);
 3228             lexptr = thisline + savelen;
 3229             lexeme = lexptr - offset;
 3230         } else {
 3231             savelen = 0;
 3232             lexptr = lexeme = lexptr_begin;
 3233             thisline = NULL;
 3234         }
 3235     }
 3236 
 3237     n = (*readfunc)(sourcefile->fd, lexptr, sourcefile->bufsize - savelen);
 3238     if (n == -1) {
 3239         error(_("cannot read source file `%s': %s"),
 3240                 source, strerror(errno));
 3241         errcount++;
 3242         lexeof = true;
 3243     } else {
 3244         lexend = lexptr + n;
 3245         if (n == 0) {
 3246             static bool warned = false;
 3247             if (do_lint && newfile && ! warned) {
 3248                 warned = true;
 3249                 sourceline = 0;
 3250                 lintwarn(_("source file `%s' is empty"), source);
 3251             }
 3252             lexeof = true;
 3253         }
 3254     }
 3255     return sourcefile->buf;
 3256 }
 3257 
 3258 /* tokadd --- add a character to the token buffer */
 3259 
 3260 #define tokadd(x) (*tok++ = (x), tok == tokend ? tokexpand() : tok)
 3261 
 3262 /* tokexpand --- grow the token buffer */
 3263 
 3264 static char *
 3265 tokexpand()
 3266 {
 3267     static int toksize;
 3268     int tokoffset;
 3269 
 3270     if (tokstart != NULL) {
 3271         tokoffset = tok - tokstart;
 3272         toksize *= 2;
 3273         erealloc(tokstart, char *, toksize, "tokexpand");
 3274         tok = tokstart + tokoffset;
 3275     } else {
 3276         toksize = 60;
 3277         emalloc(tokstart, char *, toksize, "tokexpand");
 3278         tok = tokstart;
 3279     }
 3280     tokend = tokstart + toksize;
 3281     return tok;
 3282 }
 3283 
 3284 /* check_bad_char --- fatal if c isn't allowed in gawk source code */
 3285 
 3286 /*
 3287  * The error message was inspired by someone who decided to put
 3288  * a physical \0 byte into the source code to see what would
 3289  * happen and then filed a bug report about it.  Sigh.
 3290  */
 3291 
 3292 static void
 3293 check_bad_char(int c)
 3294 {
 3295     /* allow escapes. needed for autoconf. bleah. */
 3296     switch (c) {
 3297     case '\a':
 3298     case '\b':
 3299     case '\f':
 3300     case '\n':
 3301     case '\r':
 3302     case '\t':
 3303         return;
 3304     default:
 3305         break;
 3306     }
 3307 
 3308     if (iscntrl(c) && ! isspace(c))
 3309         // This is a PEBKAC error, but we'll be nice and not say so.
 3310         fatal(_("error: invalid character '\\%03o' in source code"), c & 0xFF);
 3311 }
 3312 
 3313 /* nextc --- get the next input character */
 3314 
 3315 // For namespaces, -e chunks must be syntactic units.
 3316 #define NO_CONTINUE_SOURCE_STRINGS  1
 3317 
 3318 static int
 3319 nextc(bool check_for_bad)
 3320 {
 3321     if (gawk_mb_cur_max > 1) {
 3322 again:
 3323 #ifdef NO_CONTINUE_SOURCE_STRINGS
 3324         if (lexeof)
 3325             return END_FILE;
 3326 #else
 3327         if (lexeof) {
 3328             if (sourcefile->next == srcfiles)
 3329                 return END_FILE;
 3330             else
 3331                 next_sourcefile();
 3332         }
 3333 #endif
 3334         if (lexptr == NULL || lexptr >= lexend) {
 3335             if (get_src_buf())
 3336                 goto again;
 3337             return END_SRC;
 3338         }
 3339 
 3340         /* Update the buffer index.  */
 3341         cur_ring_idx = (cur_ring_idx == RING_BUFFER_SIZE - 1)? 0 :
 3342             cur_ring_idx + 1;
 3343 
 3344         /* Did we already check the current character?  */
 3345         if (cur_char_ring[cur_ring_idx] == 0) {
 3346             /* No, we need to check the next character on the buffer.  */
 3347             int idx, work_ring_idx = cur_ring_idx;
 3348             mbstate_t tmp_state;
 3349             size_t mbclen;
 3350 
 3351             for (idx = 0; lexptr + idx < lexend; idx++) {
 3352                 tmp_state = cur_mbstate;
 3353                 mbclen = mbrlen(lexptr, idx + 1, &tmp_state);
 3354 
 3355                 if (mbclen == 1 || mbclen == (size_t)-1 || mbclen == 0) {
 3356                     /* It is a singlebyte character, non-complete multibyte
 3357                        character or EOF.  We treat it as a singlebyte
 3358                        character.  */
 3359                     cur_char_ring[work_ring_idx] = 1;
 3360                     break;
 3361                 } else if (mbclen == (size_t)-2) {
 3362                     /* It is not a complete multibyte character.  */
 3363                     cur_char_ring[work_ring_idx] = idx + 1;
 3364                 } else {
 3365                     /* mbclen > 1 */
 3366                     cur_char_ring[work_ring_idx] = mbclen;
 3367                     break;
 3368                 }
 3369                 work_ring_idx = (work_ring_idx == RING_BUFFER_SIZE - 1)?
 3370                     0 : work_ring_idx + 1;
 3371             }
 3372             cur_mbstate = tmp_state;
 3373 
 3374             /* Put a mark on the position on which we write next character.  */
 3375             work_ring_idx = (work_ring_idx == RING_BUFFER_SIZE - 1)?
 3376                 0 : work_ring_idx + 1;
 3377             cur_char_ring[work_ring_idx] = 0;
 3378         }
 3379         if (check_for_bad || *lexptr == '\0')
 3380             check_bad_char(*lexptr);
 3381 
 3382         return (int) (unsigned char) *lexptr++;
 3383     } else {
 3384         do {
 3385 #ifdef NO_CONTINUE_SOURCE_STRINGS
 3386             if (lexeof)
 3387                 return END_FILE;
 3388 #else
 3389             if (lexeof) {
 3390                 if (sourcefile->next == srcfiles)
 3391                     return END_FILE;
 3392                 else
 3393                     next_sourcefile();
 3394             }
 3395 #endif
 3396             if (lexptr && lexptr < lexend) {
 3397                 if (check_for_bad || *lexptr == '\0')
 3398                     check_bad_char(*lexptr);
 3399                 return ((int) (unsigned char) *lexptr++);
 3400             }
 3401         } while (get_src_buf());
 3402         return END_SRC;
 3403     }
 3404 }
 3405 #undef NO_CONTINUE_SOURCE_STRINGS
 3406 
 3407 /* pushback --- push a character back on the input */
 3408 
 3409 static inline void
 3410 pushback(void)
 3411 {
 3412     if (gawk_mb_cur_max > 1)
 3413         cur_ring_idx = (cur_ring_idx == 0)? RING_BUFFER_SIZE - 1 :
 3414             cur_ring_idx - 1;
 3415     (! lexeof && lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr);
 3416 }
 3417 
 3418 /*
 3419  * get_comment --- collect comment text.
 3420  *  Flag = EOL_COMMENT for end-of-line comments.
 3421  *  Flag = BLOCK_COMMENT for self-contained comments.
 3422  */
 3423 
 3424 static int
 3425 get_comment(enum commenttype flag, INSTRUCTION **comment_instruction)
 3426 {
 3427     int c;
 3428     int sl;
 3429     char *p1;
 3430     char *p2;
 3431 
 3432     tok = tokstart;
 3433     tokadd('#');
 3434     sl = sourceline;
 3435 
 3436     while (true) {
 3437         while ((c = nextc(false)) != '\n' && c != END_FILE) {
 3438             /* ignore \r characters */
 3439             if (c != '\r')
 3440                 tokadd(c);
 3441         }
 3442         if (flag == EOL_COMMENT) {
 3443             /* comment at end of line.  */
 3444             if (c == '\n')
 3445                 tokadd(c);
 3446             break;
 3447         }
 3448         if (c == '\n') {
 3449             tokadd(c);
 3450             sourceline++;
 3451             do {
 3452                 c = nextc(false);
 3453                 if (c == '\n') {
 3454                     sourceline++;
 3455                     tokadd(c);
 3456                 }
 3457             } while (isspace(c) && c != END_FILE);
 3458             if (c == END_FILE)
 3459                 break;
 3460             else if (c != '#') {
 3461                 pushback();
 3462                 sourceline--;
 3463                 break;
 3464             } else
 3465                 tokadd(c);
 3466         } else
 3467             break;
 3468     }
 3469 
 3470     /* remove any trailing blank lines (consecutive \n) from comment */
 3471     p1 = tok - 1;
 3472     p2 = tok - 2;
 3473     while (*p1 == '\n' && *p2 == '\n') {
 3474         p1--;
 3475         p2--;
 3476         tok--;
 3477     }
 3478 
 3479     (*comment_instruction) = bcalloc(Op_comment, 1, sl);
 3480     (*comment_instruction)->source_file = source;
 3481     (*comment_instruction)->memory = make_str_node(tokstart, tok - tokstart, 0);
 3482     (*comment_instruction)->memory->comment_type = flag;
 3483 
 3484     return c;
 3485 }
 3486 
 3487 /* allow_newline --- allow newline after &&, ||, ? and : */
 3488 
 3489 static void
 3490 allow_newline(INSTRUCTION **new_comment)
 3491 {
 3492     int c;
 3493 
 3494     for (;;) {
 3495         c = nextc(true);
 3496         if (c == END_FILE) {
 3497             pushback();
 3498             break;
 3499         }
 3500         if (c == '#') {
 3501             if (do_pretty_print && ! do_profile) {
 3502                 /* collect comment byte code iff doing pretty print but not profiling.  */
 3503                 c = get_comment(EOL_COMMENT, new_comment);
 3504             } else {
 3505                 while ((c = nextc(false)) != '\n' && c != END_FILE)
 3506                     continue;
 3507             }
 3508             if (c == END_FILE) {
 3509                 pushback();
 3510                 break;
 3511             }
 3512         }
 3513         if (c == '\n')
 3514             sourceline++;
 3515         if (! isspace(c)) {
 3516             pushback();
 3517             break;
 3518         }
 3519     }
 3520 }
 3521 
 3522 /* newline_eof --- return newline or EOF as needed and adjust variables */
 3523 
 3524 /*
 3525  * This routine used to be a macro, however GCC 4.6.2 warned about
 3526  * the result of a computation not being used.  Converting to a function
 3527  * removes the warnings.
 3528  */
 3529 
 3530 static int
 3531 newline_eof()
 3532 {
 3533     /* NB: a newline at end does not start a source line. */
 3534     if (lasttok != NEWLINE) {
 3535                 pushback();
 3536         if (do_lint && ! eof_warned) {
 3537                 lintwarn(_("source file does not end in newline"));
 3538             eof_warned = true;
 3539         }
 3540         sourceline++;
 3541         return NEWLINE;
 3542     }
 3543 
 3544     sourceline--;
 3545     eof_warned = false;
 3546     return LEX_EOF;
 3547 }
 3548 
 3549 /* yylex --- Read the input and turn it into tokens. */
 3550 
 3551 static int
 3552 #ifdef USE_EBCDIC
 3553 yylex_ebcdic(void)
 3554 #else
 3555 yylex(void)
 3556 #endif
 3557 {
 3558     int c;
 3559     bool seen_e = false;        /* These are for numbers */
 3560     bool seen_point = false;
 3561     bool esc_seen;      /* for literal strings */
 3562     int mid;
 3563     int base;
 3564     static bool did_newline = false;
 3565     char *tokkey;
 3566     bool inhex = false;
 3567     bool intlstr = false;
 3568     AWKNUM d;
 3569     bool collecting_typed_regexp = false;
 3570     static int qm_col_count = 0;
 3571 
 3572 #define GET_INSTRUCTION(op) bcalloc(op, 1, sourceline)
 3573 
 3574 #define NEWLINE_EOF newline_eof()
 3575 
 3576     yylval = (INSTRUCTION *) NULL;
 3577     if (lasttok == SUBSCRIPT) {
 3578         lasttok = 0;
 3579         return SUBSCRIPT;
 3580     }
 3581 
 3582     if (lasttok == LEX_EOF)     /* error earlier in current source, must give up !! */
 3583         return 0;
 3584 
 3585     c = nextc(! want_regexp);
 3586     if (c == END_SRC)
 3587         return 0;
 3588     if (c == END_FILE)
 3589         return lasttok = NEWLINE_EOF;
 3590     pushback();
 3591 
 3592 #if defined __EMX__
 3593     /*
 3594      * added for OS/2's extproc feature of cmd.exe
 3595      * (like #! in BSD sh)
 3596      */
 3597     if (strncasecmp(lexptr, "extproc ", 8) == 0) {
 3598         while (*lexptr && *lexptr != '\n')
 3599             lexptr++;
 3600     }
 3601 #endif
 3602 
 3603     lexeme = lexptr;
 3604     thisline = NULL;
 3605 
 3606 collect_regexp:
 3607     if (want_regexp) {
 3608         int in_brack = 0;   /* count brackets, [[:alnum:]] allowed */
 3609         int b_index = -1;
 3610         int cur_index = 0;
 3611 
 3612         /*
 3613          * Here is what's ok with brackets:
 3614          *
 3615          * [..[..] []] [^]] [.../...]
 3616          * [...\[...] [...\]...] [...\/...]
 3617          *
 3618          * (Remember that all of the above are inside /.../)
 3619          *
 3620          * The code for \ handles \[, \] and \/.
 3621          *
 3622          * Otherwise, track the first open [ position, and if
 3623          * an embedded ] occurs, allow it to pass through
 3624          * if it's right after the first [ or after [^.
 3625          *
 3626          * Whew!
 3627          */
 3628 
 3629         want_regexp = false;
 3630         tok = tokstart;
 3631         for (;;) {
 3632             c = nextc(false);
 3633 
 3634             cur_index = tok - tokstart;
 3635             if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) switch (c) {
 3636             case '[':
 3637                 if (nextc(false) == ':' || in_brack == 0) {
 3638                     in_brack++;
 3639                     if (in_brack == 1)
 3640                         b_index = tok - tokstart;
 3641                 }
 3642                 pushback();
 3643                 break;
 3644             case ']':
 3645                 if (in_brack > 0
 3646                     && (cur_index == b_index + 1
 3647                     || (cur_index == b_index + 2 && tok[-1] == '^')))
 3648                     ; /* do nothing */
 3649                 else {
 3650                     in_brack--;
 3651                     if (in_brack == 0)
 3652                         b_index = -1;
 3653                 }
 3654                 break;
 3655             case '\\':
 3656                 if ((c = nextc(false)) == END_FILE) {
 3657                     pushback();
 3658                     yyerror(_("unterminated regexp ends with `\\' at end of file"));
 3659                     goto end_regexp; /* kludge */
 3660                 }
 3661                 if (c == '\r')  /* allow MS-DOS files. bleah */
 3662                     c = nextc(true);
 3663                 if (c == '\n') {
 3664                     sourceline++;
 3665                     continue;
 3666                 } else {
 3667                     tokadd('\\');
 3668                     tokadd(c);
 3669                     continue;
 3670                 }
 3671                 break;
 3672             case '/':   /* end of the regexp */
 3673                 if (in_brack > 0)
 3674                     break;
 3675 end_regexp:
 3676                 yylval = GET_INSTRUCTION(Op_token);
 3677                 yylval->lextok = estrdup(tokstart, tok - tokstart);
 3678                 if (do_lint) {
 3679                     int peek = nextc(true);
 3680 
 3681                     pushback();
 3682                     if (peek == 'i' || peek == 's') {
 3683                         if (source)
 3684                             lintwarn(
 3685                         _("%s: %d: tawk regex modifier `/.../%c' doesn't work in gawk"),
 3686                                 source, sourceline, peek);
 3687                         else
 3688                             lintwarn(
 3689                         _("tawk regex modifier `/.../%c' doesn't work in gawk"),
 3690                                 peek);
 3691                     }
 3692                 }
 3693                 if (collecting_typed_regexp) {
 3694                     collecting_typed_regexp = false;
 3695                     lasttok = TYPED_REGEXP;
 3696                 } else
 3697                     lasttok = REGEXP;
 3698 
 3699                 return lasttok;
 3700             case '\n':
 3701                 pushback();
 3702                 yyerror(_("unterminated regexp"));
 3703                 goto end_regexp;    /* kludge */
 3704             case END_FILE:
 3705                 pushback();
 3706                 yyerror(_("unterminated regexp at end of file"));
 3707                 goto end_regexp;    /* kludge */
 3708             }
 3709             tokadd(c);
 3710         }
 3711     }
 3712 retry:
 3713 
 3714     /* skipping \r is a hack, but windows is just too pervasive. sigh. */
 3715     while ((c = nextc(true)) == ' ' || c == '\t' || c == '\r')
 3716         continue;
 3717 
 3718     lexeme = lexptr ? lexptr - 1 : lexptr;
 3719     thisline = NULL;
 3720     tok = tokstart;
 3721 
 3722     if (gawk_mb_cur_max == 1 || nextc_is_1stbyte)
 3723     switch (c) {
 3724     case END_SRC:
 3725         return 0;
 3726 
 3727     case END_FILE:
 3728         return lasttok = NEWLINE_EOF;
 3729 
 3730     case '\n':
 3731         sourceline++;
 3732         return lasttok = NEWLINE;
 3733 
 3734     case '#':       /* it's a comment */
 3735         yylval = NULL;
 3736         if (do_pretty_print && ! do_profile) {
 3737             /*
 3738              * Collect comment byte code iff doing pretty print
 3739              * but not profiling.
 3740              */
 3741             INSTRUCTION *new_comment;
 3742 
 3743             if (lasttok == NEWLINE || lasttok == 0)
 3744                 c = get_comment(BLOCK_COMMENT, & new_comment);
 3745             else
 3746                 c = get_comment(EOL_COMMENT, & new_comment);
 3747 
 3748             yylval = new_comment;
 3749 
 3750             if (c == END_FILE) {
 3751                 pushback();
 3752                 return lasttok = NEWLINE;
 3753             }
 3754         } else {
 3755             while ((c = nextc(false)) != '\n') {
 3756                 if (c == END_FILE)
 3757                     return lasttok = NEWLINE_EOF;
 3758             }
 3759         }
 3760         sourceline++;
 3761         return lasttok = NEWLINE;
 3762 
 3763     case '@':
 3764         c = nextc(true);
 3765         if (c == '/') {
 3766             want_regexp = true;
 3767             collecting_typed_regexp = true;
 3768             goto collect_regexp;
 3769         }
 3770         pushback();
 3771         at_seen = true;
 3772         return lasttok = '@';
 3773 
 3774     case '\\':
 3775 #ifdef RELAXED_CONTINUATION
 3776         /*
 3777          * This code purports to allow comments and/or whitespace
 3778          * after the `\' at the end of a line used for continuation.
 3779          * Use it at your own risk. We think it's a bad idea, which
 3780          * is why it's not on by default.
 3781          */
 3782         yylval = NULL;
 3783         if (! do_traditional) {
 3784             INSTRUCTION *new_comment;
 3785 
 3786             /* strip trailing white-space and/or comment */
 3787             while ((c = nextc(true)) == ' ' || c == '\t' || c == '\r')
 3788                 continue;
 3789             if (c == '#') {
 3790                 static bool warned = false;
 3791 
 3792                 if (do_lint && ! warned) {
 3793                     warned = true;
 3794                     lintwarn(
 3795         _("use of `\\ #...' line continuation is not portable"));
 3796                 }
 3797                 if (do_pretty_print && ! do_profile) {
 3798                     c = get_comment(EOL_COMMENT, & new_comment);
 3799                     yylval = new_comment;
 3800                     return lasttok = c;
 3801                 } else {
 3802                     while ((c = nextc(false)) != '\n')
 3803                         if (c == END_FILE)
 3804                             break;
 3805                 }
 3806             }
 3807             pushback();
 3808         }
 3809 #endif /* RELAXED_CONTINUATION */
 3810         c = nextc(true);
 3811         if (c == '\r')  /* allow MS-DOS files. bleah */
 3812             c = nextc(true);
 3813         if (c == '\n') {
 3814             sourceline++;
 3815             goto retry;
 3816         } else {
 3817             yyerror(_("backslash not last character on line"));
 3818             return lasttok = LEX_EOF;
 3819         }
 3820         break;
 3821 
 3822     case '?':
 3823         qm_col_count++;
 3824         // fall through
 3825     case ':':
 3826         yylval = GET_INSTRUCTION(Op_cond_exp);
 3827         if (qm_col_count > 0) {
 3828             if (! do_posix) {
 3829                 INSTRUCTION *new_comment = NULL;
 3830                 allow_newline(& new_comment);
 3831                 yylval->comment = new_comment;
 3832             }
 3833             if (c == ':')
 3834                 qm_col_count--;
 3835         }
 3836         return lasttok = c;
 3837 
 3838         /*
 3839          * in_parens is undefined unless we are parsing a print
 3840          * statement (in_print), but why bother with a check?
 3841          */
 3842     case ')':
 3843         in_parens--;
 3844         return lasttok = c;
 3845 
 3846     case '(':
 3847         in_parens++;
 3848         return lasttok = c;
 3849     case '$':
 3850         yylval = GET_INSTRUCTION(Op_field_spec);
 3851         return lasttok = c;
 3852     case '{':
 3853         if (++in_braces == 1)
 3854             firstline = sourceline;
 3855     case ';':
 3856     case ',':
 3857     case '[':
 3858             return lasttok = c;
 3859     case ']':
 3860         c = nextc(true);
 3861         pushback();
 3862         if (c == '[') {
 3863             if (do_traditional)
 3864                 fatal(_("multidimensional arrays are a gawk extension"));
 3865             if (do_lint_extensions)
 3866                 lintwarn(_("multidimensional arrays are a gawk extension"));
 3867             yylval = GET_INSTRUCTION(Op_sub_array);
 3868             lasttok = ']';
 3869         } else {
 3870             yylval = GET_INSTRUCTION(Op_subscript);
 3871             lasttok = SUBSCRIPT;    /* end of subscripts */
 3872         }
 3873         return ']';
 3874 
 3875     case '*':
 3876         if ((c = nextc(true)) == '=') {
 3877             yylval = GET_INSTRUCTION(Op_assign_times);
 3878             return lasttok = ASSIGNOP;
 3879         } else if (do_posix) {
 3880             pushback();
 3881             yylval = GET_INSTRUCTION(Op_times);
 3882             return lasttok = '*';
 3883         } else if (c == '*') {
 3884             /* make ** and **= aliases for ^ and ^= */
 3885             static bool did_warn_op = false, did_warn_assgn = false;
 3886 
 3887             if (nextc(true) == '=') {
 3888                 if (! did_warn_assgn) {
 3889                     did_warn_assgn = true;
 3890                     if (do_lint)
 3891                         lintwarn(_("POSIX does not allow operator `%s'"), "**=");
 3892                     if (do_lint_old)
 3893                         lintwarn(_("operator `%s' is not supported in old awk"), "**=");
 3894                 }
 3895                 yylval = GET_INSTRUCTION(Op_assign_exp);
 3896                 return ASSIGNOP;
 3897             } else {
 3898                 pushback();
 3899                 if (! did_warn_op) {
 3900                     did_warn_op = true;
 3901                     if (do_lint)
 3902                         lintwarn(_("POSIX does not allow operator `%s'"), "**");
 3903                     if (do_lint_old)
 3904                         lintwarn(_("operator `%s' is not supported in old awk"), "**");
 3905                 }
 3906                 yylval = GET_INSTRUCTION(Op_exp);
 3907                 return lasttok = '^';
 3908             }
 3909         }
 3910         pushback();
 3911         yylval = GET_INSTRUCTION(Op_times);
 3912         return lasttok = '*';
 3913 
 3914     case '/':
 3915         if (nextc(false) == '=') {
 3916             pushback();
 3917             return lasttok = SLASH_BEFORE_EQUAL;
 3918         }
 3919         pushback();
 3920         yylval = GET_INSTRUCTION(Op_quotient);
 3921         return lasttok = '/';
 3922 
 3923     case '%':
 3924         if (nextc(true) == '=') {
 3925             yylval = GET_INSTRUCTION(Op_assign_mod);
 3926             return lasttok = ASSIGNOP;
 3927         }
 3928         pushback();
 3929         yylval = GET_INSTRUCTION(Op_mod);
 3930         return lasttok = '%';
 3931 
 3932     case '^':
 3933     {
 3934         static bool did_warn_op = false, did_warn_assgn = false;
 3935 
 3936         if (nextc(true) == '=') {
 3937             if (do_lint_old && ! did_warn_assgn) {
 3938                 did_warn_assgn = true;
 3939                 lintwarn(_("operator `%s' is not supported in old awk"), "^=");
 3940             }
 3941             yylval = GET_INSTRUCTION(Op_assign_exp);
 3942             return lasttok = ASSIGNOP;
 3943         }
 3944         pushback();
 3945         if (do_lint_old && ! did_warn_op) {
 3946             did_warn_op = true;
 3947             lintwarn(_("operator `%s' is not supported in old awk"), "^");
 3948         }
 3949         yylval = GET_INSTRUCTION(Op_exp);
 3950         return lasttok = '^';
 3951     }
 3952 
 3953     case '+':
 3954         if ((c = nextc(true)) == '=') {
 3955             yylval = GET_INSTRUCTION(Op_assign_plus);
 3956             return lasttok = ASSIGNOP;
 3957         }
 3958         if (c == '+') {
 3959             yylval = GET_INSTRUCTION(Op_symbol);
 3960             return lasttok = INCREMENT;
 3961         }
 3962         pushback();
 3963         yylval = GET_INSTRUCTION(Op_plus);
 3964         return lasttok = '+';
 3965 
 3966     case '!':
 3967         if ((c = nextc(true)) == '=') {
 3968             yylval = GET_INSTRUCTION(Op_notequal);
 3969             return lasttok = RELOP;
 3970         }
 3971         if (c == '~') {
 3972             yylval = GET_INSTRUCTION(Op_nomatch);
 3973             return lasttok = MATCHOP;
 3974         }
 3975         pushback();
 3976         yylval = GET_INSTRUCTION(Op_symbol);
 3977         return lasttok = '!';
 3978 
 3979     case '<':
 3980         if (nextc(true) == '=') {
 3981             yylval = GET_INSTRUCTION(Op_leq);
 3982             return lasttok = RELOP;
 3983         }
 3984         yylval = GET_INSTRUCTION(Op_less);
 3985         pushback();
 3986         return lasttok = '<';
 3987 
 3988     case '=':
 3989         if (nextc(true) == '=') {
 3990             yylval = GET_INSTRUCTION(Op_equal);
 3991             return lasttok = RELOP;
 3992         }
 3993         yylval = GET_INSTRUCTION(Op_assign);
 3994         pushback();
 3995         return lasttok = ASSIGN;
 3996 
 3997     case '>':
 3998         if ((c = nextc(true)) == '=') {
 3999             yylval = GET_INSTRUCTION(Op_geq);
 4000             return lasttok = RELOP;
 4001         } else if (c == '>') {
 4002             yylval = GET_INSTRUCTION(Op_symbol);
 4003             yylval->redir_type = redirect_append;
 4004             return lasttok = IO_OUT;
 4005         }
 4006         pushback();
 4007         if (in_print && in_parens == 0) {
 4008             yylval = GET_INSTRUCTION(Op_symbol);
 4009             yylval->redir_type = redirect_output;
 4010             return lasttok = IO_OUT;
 4011         }
 4012         yylval = GET_INSTRUCTION(Op_greater);
 4013         return lasttok = '>';
 4014 
 4015     case '~':
 4016         yylval = GET_INSTRUCTION(Op_match);
 4017         return lasttok = MATCHOP;
 4018 
 4019     case '}':
 4020         /*
 4021          * Added did newline stuff.  Easier than
 4022          * hacking the grammar.
 4023          */
 4024         if (did_newline) {
 4025             did_newline = false;
 4026             if (--in_braces == 0)
 4027                 lastline = sourceline;
 4028             return lasttok = c;
 4029         }
 4030         did_newline = true;
 4031         --lexptr;   /* pick up } next time */
 4032         return lasttok = NEWLINE;
 4033 
 4034     case '"':
 4035     string:
 4036         esc_seen = false;
 4037         /*
 4038          * Allow any kind of junk in quoted string,
 4039          * so pass false to nextc().
 4040          */
 4041         while ((c = nextc(false)) != '"') {
 4042             if (c == '\n') {
 4043                 pushback();
 4044                 yyerror(_("unterminated string"));
 4045                 return lasttok = LEX_EOF;
 4046             }
 4047             if ((gawk_mb_cur_max == 1 || nextc_is_1stbyte) &&
 4048                 c == '\\') {
 4049                 c = nextc(true);
 4050                 if (c == '\r')  /* allow MS-DOS files. bleah */
 4051                     c = nextc(true);
 4052                 if (c == '\n') {
 4053                     if (do_posix)
 4054                         fatal(_("POSIX does not allow physical newlines in string values"));
 4055                     else if (do_lint)
 4056                         lintwarn(_("backslash string continuation is not portable"));
 4057                     sourceline++;
 4058                     continue;
 4059                 }
 4060                 esc_seen = true;
 4061                 if (! want_source || c != '"')
 4062                     tokadd('\\');
 4063             }
 4064             if (c == END_FILE) {
 4065                 pushback();
 4066                 yyerror(_("unterminated string"));
 4067                 return lasttok = LEX_EOF;
 4068             }
 4069             tokadd(c);
 4070         }
 4071         yylval = GET_INSTRUCTION(Op_token);
 4072         if (want_source) {
 4073             yylval->lextok = estrdup(tokstart, tok - tokstart);
 4074             return lasttok = FILENAME;
 4075         }
 4076 
 4077         yylval->opcode = Op_push_i;
 4078         yylval->memory = make_str_node(tokstart,
 4079                     tok - tokstart, esc_seen ? SCAN : 0);
 4080         if (intlstr) {
 4081             yylval->memory->flags |= INTLSTR;
 4082             intlstr = false;
 4083             if (do_intl)
 4084                 dumpintlstr(yylval->memory->stptr, yylval->memory->stlen);
 4085         }
 4086         return lasttok = YSTRING;
 4087 
 4088     case '-':
 4089         if ((c = nextc(true)) == '=') {
 4090             yylval = GET_INSTRUCTION(Op_assign_minus);
 4091             return lasttok = ASSIGNOP;
 4092         }
 4093         if (c == '-') {
 4094             yylval = GET_INSTRUCTION(Op_symbol);
 4095             return lasttok = DECREMENT;
 4096         }
 4097         pushback();
 4098         yylval = GET_INSTRUCTION(Op_minus);
 4099         return lasttok = '-';
 4100 
 4101     case '.':
 4102         c = nextc(true);
 4103         pushback();
 4104         if (! isdigit(c))
 4105             return lasttok = '.';
 4106         else
 4107             c = '.';
 4108         /* FALL THROUGH */
 4109     case '0':
 4110     case '1':
 4111     case '2':
 4112     case '3':
 4113     case '4':
 4114     case '5':
 4115     case '6':
 4116     case '7':
 4117     case '8':
 4118     case '9':
 4119         /* It's a number */
 4120         for (;;) {
 4121             bool gotnumber = false;
 4122 
 4123             tokadd(c);
 4124             switch (c) {
 4125             case 'x':
 4126             case 'X':
 4127                 if (do_traditional)
 4128                     goto done;
 4129                 if (tok == tokstart + 2) {
 4130                     int peek = nextc(true);
 4131 
 4132                     if (isxdigit(peek)) {
 4133                         inhex = true;
 4134                         pushback(); /* following digit */
 4135                     } else {
 4136                         pushback(); /* x or X */
 4137                         goto done;
 4138                     }
 4139                 }
 4140                 break;
 4141             case '.':
 4142                 /* period ends exponent part of floating point number */
 4143                 if (seen_point || seen_e) {
 4144                     gotnumber = true;
 4145                     break;
 4146                 }
 4147                 seen_point = true;
 4148                 break;
 4149             case 'e':
 4150             case 'E':
 4151                 if (inhex)
 4152                     break;
 4153                 if (seen_e) {
 4154                     gotnumber = true;
 4155                     break;
 4156                 }
 4157                 seen_e = true;
 4158                 if ((c = nextc(true)) == '-' || c == '+') {
 4159                     int c2 = nextc(true);
 4160 
 4161                     if (isdigit(c2)) {
 4162                         tokadd(c);
 4163                         tokadd(c2);
 4164                     } else {
 4165                         pushback(); /* non-digit after + or - */
 4166                         pushback(); /* + or - */
 4167                         pushback(); /* e or E */
 4168                     }
 4169                 } else if (! isdigit(c)) {
 4170                     pushback(); /* character after e or E */
 4171                     pushback(); /* e or E */
 4172                 } else {
 4173                     pushback(); /* digit */
 4174                 }
 4175                 break;
 4176             case 'a':
 4177             case 'A':
 4178             case 'b':
 4179             case 'B':
 4180             case 'c':
 4181             case 'C':
 4182             case 'D':
 4183             case 'd':
 4184             case 'f':
 4185             case 'F':
 4186                 if (do_traditional || ! inhex)
 4187                     goto done;
 4188                 /* fall through */
 4189             case '0':
 4190             case '1':
 4191             case '2':
 4192             case '3':
 4193             case '4':
 4194             case '5':
 4195             case '6':
 4196             case '7':
 4197             case '8':
 4198             case '9':
 4199                 break;
 4200             default:
 4201             done:
 4202                 gotnumber = true;
 4203             }
 4204             if (gotnumber)
 4205                 break;
 4206             c = nextc(true);
 4207         }
 4208         pushback();
 4209 
 4210         tokadd('\0');
 4211         yylval = GET_INSTRUCTION(Op_push_i);
 4212 
 4213         base = 10;
 4214         if (! do_traditional) {
 4215             base = get_numbase(tokstart, strlen(tokstart)-1, false);
 4216             if (do_lint) {
 4217                 if (base == 8)
 4218                     lintwarn("numeric constant `%.*s' treated as octal",
 4219                         (int) strlen(tokstart)-1, tokstart);
 4220                 else if (base == 16)
 4221                     lintwarn("numeric constant `%.*s' treated as hexadecimal",
 4222                         (int) strlen(tokstart)-1, tokstart);
 4223             }
 4224         }
 4225 
 4226 #ifdef HAVE_MPFR
 4227         if (do_mpfr) {
 4228             NODE *r;
 4229 
 4230             if (! seen_point && ! seen_e) {
 4231                 r = mpg_integer();
 4232                 mpg_strtoui(r->mpg_i, tokstart, strlen(tokstart), NULL, base);
 4233                 errno = 0;
 4234             } else {
 4235                 int tval;
 4236                 r = mpg_float();
 4237                 tval = mpfr_strtofr(r->mpg_numbr, tokstart, NULL, base, ROUND_MODE);
 4238                 errno = 0;
 4239                 IEEE_FMT(r->mpg_numbr, tval);
 4240             }
 4241             yylval->memory = set_profile_text(r, tokstart, strlen(tokstart)-1);
 4242             return lasttok = YNUMBER;
 4243         }
 4244 #endif
 4245         if (base != 10)
 4246             d = nondec2awknum(tokstart, strlen(tokstart)-1, NULL);
 4247         else
 4248             d = atof(tokstart);
 4249         yylval->memory = set_profile_text(make_number(d), tokstart, strlen(tokstart) - 1);
 4250         if (d <= INT32_MAX && d >= INT32_MIN && d == (int32_t) d)
 4251             yylval->memory->flags |= NUMINT;
 4252         return lasttok = YNUMBER;
 4253 
 4254     case '&':
 4255         if ((c = nextc(true)) == '&') {
 4256             yylval = GET_INSTRUCTION(Op_and);
 4257             INSTRUCTION *new_comment = NULL;
 4258             allow_newline(& new_comment);
 4259             yylval->comment = new_comment;
 4260 
 4261             return lasttok = LEX_AND;
 4262         }
 4263         pushback();
 4264         yylval = GET_INSTRUCTION(Op_symbol);
 4265         return lasttok = '&';
 4266 
 4267     case '|':
 4268         if ((c = nextc(true)) == '|') {
 4269             yylval = GET_INSTRUCTION(Op_or);
 4270             INSTRUCTION *new_comment = NULL;
 4271             allow_newline(& new_comment);
 4272             yylval->comment = new_comment;
 4273 
 4274             return lasttok = LEX_OR;
 4275         } else if (! do_traditional && c == '&') {
 4276             yylval = GET_INSTRUCTION(Op_symbol);
 4277             yylval->redir_type = redirect_twoway;
 4278 
 4279             return lasttok = (in_print && in_parens == 0 ? IO_OUT : IO_IN);
 4280         }
 4281         pushback();
 4282         if (in_print && in_parens == 0) {
 4283             yylval = GET_INSTRUCTION(Op_symbol);
 4284             yylval->redir_type = redirect_pipe;
 4285             return lasttok = IO_OUT;
 4286         } else {
 4287             yylval = GET_INSTRUCTION(Op_symbol);
 4288             yylval->redir_type = redirect_pipein;
 4289             return lasttok = IO_IN;
 4290         }
 4291     }
 4292 
 4293     if (! is_letter(c)) {
 4294         yyerror(_("invalid char '%c' in expression"), c);
 4295         return lasttok = LEX_EOF;
 4296     }
 4297 
 4298     /*
 4299      * Lots of fog here.  Consider:
 4300      *
 4301      * print "xyzzy"$_"foo"
 4302      *
 4303      * Without the check for ` lasttok != '$' ', this is parsed as
 4304      *
 4305      * print "xxyzz" $(_"foo")
 4306      *
 4307      * With the check, it is "correctly" parsed as three
 4308      * string concatenations.  Sigh.  This seems to be
 4309      * "more correct", but this is definitely one of those
 4310      * occasions where the interactions are funny.
 4311      */
 4312     if (! do_traditional && c == '_' && lasttok != '$') {
 4313         if ((c = nextc(true)) == '"') {
 4314             intlstr = true;
 4315             goto string;
 4316         }
 4317         pushback();
 4318         c = '_';
 4319     }
 4320 
 4321     /* it's some type of name-type-thing.  Find its length. */
 4322     tok = tokstart;
 4323     while (c != END_FILE && is_identchar(c)) {
 4324         tokadd(c);
 4325         c = nextc(true);
 4326 
 4327         if (! do_traditional && c == ':') {
 4328             int peek = nextc(true);
 4329 
 4330             if (peek == ':') {  // saw identifier::
 4331                 tokadd(c);
 4332                 tokadd(c);
 4333                 c = nextc(true);
 4334             } else
 4335                 pushback();
 4336                 // then continue around the loop, c == ':'
 4337         }
 4338     }
 4339     tokadd('\0');
 4340     pushback();
 4341 
 4342     (void) validate_qualified_name(tokstart);
 4343 
 4344     /* See if it is a special token. */
 4345     if ((mid = check_qualified_special(tokstart)) >= 0) {
 4346         static int warntab[sizeof(tokentab) / sizeof(tokentab[0])];
 4347         int class = tokentab[mid].class;
 4348 
 4349         switch (class) {
 4350         case LEX_EVAL:
 4351         case LEX_INCLUDE:
 4352         case LEX_LOAD:
 4353         case LEX_NAMESPACE:
 4354             if (lasttok != '@')
 4355                 goto out;
 4356         default:
 4357             break;
 4358         }
 4359 
 4360         /* allow parameter names to shadow the names of gawk extension built-ins */
 4361         if ((tokentab[mid].flags & GAWKX) != 0) {
 4362             NODE *f;
 4363 
 4364             switch (want_param_names) {
 4365             case FUNC_HEADER:
 4366                 /* in header, defining parameter names */
 4367                 goto out;
 4368             case FUNC_BODY:
 4369                 /* in body, name must be in symbol table for it to be a parameter */
 4370                 if ((f = lookup(tokstart)) != NULL) {
 4371                     if (f->type == Node_builtin_func)
 4372                         break;
 4373                     else
 4374                         goto out;
 4375                 }
 4376                 /* else
 4377                     fall through */
 4378             case DONT_CHECK:
 4379                 /* regular code */
 4380                 break;
 4381             default:
 4382                 cant_happen();
 4383                 break;
 4384             }
 4385         }
 4386 
 4387         if (do_lint) {
 4388             if (do_lint_extensions && (tokentab[mid].flags & GAWKX) != 0 && (warntab[mid] & GAWKX) == 0) {
 4389                 lintwarn(_("`%s' is a gawk extension"),
 4390                     tokentab[mid].operator);
 4391                 warntab[mid] |= GAWKX;
 4392             }
 4393             if ((tokentab[mid].flags & NOT_POSIX) != 0 && (warntab[mid] & NOT_POSIX) == 0) {
 4394                 lintwarn(_("POSIX does not allow `%s'"),
 4395                     tokentab[mid].operator);
 4396                 warntab[mid] |= NOT_POSIX;
 4397             }
 4398         }
 4399         if (do_lint_old && (tokentab[mid].flags & NOT_OLD) != 0
 4400                  && (warntab[mid] & NOT_OLD) == 0
 4401         ) {
 4402             lintwarn(_("`%s' is not supported in old awk"),
 4403                     tokentab[mid].operator);
 4404             warntab[mid] |= NOT_OLD;
 4405         }
 4406 
 4407         if ((tokentab[mid].flags & BREAK) != 0)
 4408             break_allowed++;
 4409         if ((tokentab[mid].flags & CONTINUE) != 0)
 4410             continue_allowed++;
 4411 
 4412         switch (class) {
 4413         case LEX_NAMESPACE:
 4414         case LEX_INCLUDE:
 4415         case LEX_LOAD:
 4416             want_source = true;
 4417             break;
 4418         case LEX_EVAL:
 4419             if (in_main_context())
 4420                 goto out;
 4421             emalloc(tokkey, char *, tok - tokstart + 1, "yylex");
 4422             tokkey[0] = '@';
 4423             memcpy(tokkey + 1, tokstart, tok - tokstart);
 4424             yylval = GET_INSTRUCTION(Op_token);
 4425             yylval->lextok = tokkey;
 4426             break;
 4427 
 4428         case LEX_FUNCTION:
 4429         case LEX_BEGIN:
 4430         case LEX_END:
 4431         case LEX_BEGINFILE:
 4432         case LEX_ENDFILE:
 4433             yylval = bcalloc(tokentab[mid].value, 4, sourceline);
 4434             break;
 4435 
 4436         case LEX_FOR:
 4437         case LEX_WHILE:
 4438         case LEX_DO:
 4439         case LEX_SWITCH:
 4440             if (! do_pretty_print)
 4441                 return lasttok = class;
 4442             /* fall through */
 4443         case LEX_CASE:
 4444             yylval = bcalloc(tokentab[mid].value, 2, sourceline);
 4445             break;
 4446 
 4447         /*
 4448          * These must be checked here, due to the LALR nature of the parser,
 4449          * the rules for continue and break may not be reduced until after
 4450          * a token that increments the xxx_allowed varibles is seen. Bleah.
 4451          */
 4452         case LEX_CONTINUE:
 4453             if (! continue_allowed) {
 4454                 error_ln(sourceline,
 4455                     _("`continue' is not allowed outside a loop"));
 4456                 errcount++;
 4457             }
 4458             goto make_instruction;
 4459 
 4460         case LEX_BREAK:
 4461             if (! break_allowed) {
 4462                 error_ln(sourceline,
 4463                     _("`break' is not allowed outside a loop or switch"));
 4464                 errcount++;
 4465             }
 4466             goto make_instruction;
 4467 
 4468         default:
 4469 make_instruction:
 4470             yylval = GET_INSTRUCTION(tokentab[mid].value);
 4471             if (class == LEX_BUILTIN || class == LEX_LENGTH)
 4472                 yylval->builtin_idx = mid;
 4473             break;
 4474         }
 4475         return lasttok = class;
 4476     }
 4477 out:
 4478     if (want_param_names == FUNC_HEADER)
 4479         tokkey = estrdup(tokstart, tok - tokstart - 1);
 4480     else
 4481         tokkey = qualify_name(tokstart, tok - tokstart - 1);
 4482 
 4483     if (*lexptr == '(') {
 4484         yylval = bcalloc(Op_token, 2, sourceline);
 4485         yylval->lextok = tokkey;
 4486         return lasttok = FUNC_CALL;
 4487     } else {
 4488         static bool goto_warned = false;
 4489 
 4490         yylval = GET_INSTRUCTION(Op_token);
 4491         yylval->lextok = tokkey;
 4492 
 4493 #define SMART_ALECK 1
 4494         if (SMART_ALECK
 4495             && do_lint
 4496             && ! goto_warned
 4497             && tolower(tokkey[0]) == 'g'
 4498             && strcasecmp(tokkey, "goto") == 0) {
 4499             goto_warned = true;
 4500             lintwarn(_("`goto' considered harmful!"));
 4501         }
 4502         return lasttok = NAME;
 4503     }
 4504 
 4505 #undef GET_INSTRUCTION
 4506 #undef NEWLINE_EOF
 4507 }
 4508 
 4509 /* It's EBCDIC in a Bison grammar, run for the hills!
 4510 
 4511    Or, convert single-character tokens coming out of yylex() from EBCDIC to
 4512    ASCII values on-the-fly so that the parse tables need not be regenerated
 4513    for EBCDIC systems.  */
 4514 #ifdef USE_EBCDIC
 4515 static int
 4516 yylex(void)
 4517 {
 4518     static char etoa_xlate[256];
 4519     static bool do_etoa_init = true;
 4520     int tok;
 4521 
 4522     if (do_etoa_init)
 4523     {
 4524         for (tok = 0; tok < 256; tok++)
 4525             etoa_xlate[tok] = (char) tok;
 4526 #ifdef HAVE___ETOA_L
 4527         /* IBM helpfully provides this function.  */
 4528         __etoa_l(etoa_xlate, sizeof(etoa_xlate));
 4529 #else
 4530 # error "An EBCDIC-to-ASCII translation function is needed for this system"
 4531 #endif
 4532         do_etoa_init = false;
 4533     }
 4534 
 4535     tok = yylex_ebcdic();
 4536 
 4537     if (tok >= 0 && tok <= 0xFF)
 4538         tok = etoa_xlate[tok];
 4539 
 4540     return tok;
 4541 }
 4542 #endif /* USE_EBCDIC */
 4543 
 4544 /* snode --- instructions for builtin functions. Checks for arg. count
 4545              and supplies defaults where possible. */
 4546 
 4547 static INSTRUCTION *
 4548 snode(INSTRUCTION *subn, INSTRUCTION *r)
 4549 {
 4550     INSTRUCTION *arg;
 4551     INSTRUCTION *ip;
 4552     NODE *n;
 4553     int nexp = 0;
 4554     int args_allowed;
 4555     int idx = r->builtin_idx;
 4556 
 4557     if (subn != NULL) {
 4558         INSTRUCTION *tp;
 4559         for (tp = subn->nexti; tp; tp = tp->nexti) {
 4560             tp = tp->lasti;
 4561             nexp++;
 4562         }
 4563         assert(nexp > 0);
 4564     }
 4565 
 4566     /* check against how many args. are allowed for this builtin */
 4567     args_allowed = tokentab[idx].flags & ARGS;
 4568     if (args_allowed && (args_allowed & A(nexp)) == 0) {
 4569         yyerror(_("%d is invalid as number of arguments for %s"),
 4570                 nexp, tokentab[idx].operator);
 4571         return NULL;
 4572     }
 4573 
 4574     /* special processing for sub, gsub and gensub */
 4575 
 4576     if (tokentab[idx].value == Op_sub_builtin) {
 4577         const char *operator = tokentab[idx].operator;
 4578 
 4579         r->sub_flags = 0;
 4580 
 4581         arg = subn->nexti;      /* first arg list */
 4582         (void) mk_rexp(arg);
 4583 
 4584         if (strcmp(operator, "gensub") != 0) {
 4585             /* sub and gsub */
 4586 
 4587             if (strcmp(operator, "gsub") == 0)
 4588                 r->sub_flags |= GSUB;
 4589 
 4590             arg = arg->lasti->nexti;    /* 2nd arg list */
 4591             if (nexp == 2) {
 4592                 INSTRUCTION *expr;
 4593 
 4594                 expr = list_create(instruction(Op_push_i));
 4595                 expr->nexti->memory = set_profile_text(make_number(0.0), "0", 1);
 4596                 (void) mk_expression_list(subn,
 4597                         list_append(expr, instruction(Op_field_spec)));
 4598             }
 4599 
 4600             arg = arg->lasti->nexti;    /* third arg list */
 4601             ip = arg->lasti;
 4602             if (ip->opcode == Op_push_i) {
 4603                 if (do_lint)
 4604                     lintwarn(_("%s: string literal as last argument of substitute has no effect"),
 4605                         operator);
 4606                 r->sub_flags |= LITERAL;
 4607             } else {
 4608                 if (make_assignable(ip) == NULL)
 4609                     yyerror(_("%s third parameter is not a changeable object"),
 4610                         operator);
 4611                 else
 4612                     ip->do_reference = true;
 4613             }
 4614 
 4615             r->expr_count = count_expressions(&subn, false);
 4616             ip = subn->lasti;
 4617 
 4618             (void) list_append(subn, r);
 4619 
 4620             /* add after_assign code */
 4621             if (ip->opcode == Op_push_lhs && ip->memory->type == Node_var && ip->memory->var_assign) {
 4622                 (void) list_append(subn, instruction(Op_var_assign));
 4623                 subn->lasti->assign_ctxt = Op_sub_builtin;
 4624                 subn->lasti->assign_var = ip->memory->var_assign;
 4625             } else if (ip->opcode == Op_field_spec_lhs) {
 4626                 (void) list_append(subn, instruction(Op_field_assign));
 4627                 subn->lasti->assign_ctxt = Op_sub_builtin;
 4628                 subn->lasti->field_assign = (Func_ptr) 0;
 4629                 ip->target_assign = subn->lasti;
 4630             } else if (ip->opcode == Op_subscript_lhs) {
 4631                 (void) list_append(subn, instruction(Op_subscript_assign));
 4632                 subn->lasti->assign_ctxt = Op_sub_builtin;
 4633             }
 4634 
 4635             return subn;
 4636 
 4637         } else {
 4638             /* gensub */
 4639 
 4640             r->sub_flags |= GENSUB;
 4641             if (nexp == 3) {
 4642                 ip = instruction(Op_push_i);
 4643                 ip->memory = set_profile_text(make_number(0.0), "0", 1);
 4644                 (void) mk_expression_list(subn,
 4645                         list_append(list_create(ip), instruction(Op_field_spec)));
 4646             }
 4647 
 4648             r->expr_count = count_expressions(&subn, false);
 4649             return list_append(subn, r);
 4650         }
 4651     }
 4652 
 4653 #ifdef HAVE_MPFR
 4654     /* N.B.: If necessary, add special processing for alternate builtin, below */
 4655     if (do_mpfr && tokentab[idx].ptr2)
 4656         r->builtin =  tokentab[idx].ptr2;
 4657     else
 4658 #endif
 4659         r->builtin = tokentab[idx].ptr;
 4660 
 4661     /* special case processing for a few builtins */
 4662 
 4663     if (r->builtin == do_length) {
 4664         if (nexp == 0) {
 4665             /* no args. Use $0 */
 4666 
 4667             INSTRUCTION *list;
 4668             r->expr_count = 1;
 4669             list = list_create(r);
 4670             (void) list_prepend(list, instruction(Op_field_spec));
 4671             (void) list_prepend(list, instruction(Op_push_i));
 4672             list->nexti->memory = set_profile_text(make_number(0.0), "0", 1);
 4673             return list;
 4674         } else {
 4675             arg = subn->nexti;
 4676             if (arg->nexti == arg->lasti && arg->nexti->opcode == Op_push)
 4677                 arg->nexti->opcode = Op_push_arg;   /* argument may be array */
 4678         }
 4679     } else if (r->builtin == do_isarray) {
 4680         arg = subn->nexti;
 4681         if (arg->nexti == arg->lasti && arg->nexti->opcode == Op_push)
 4682             arg->nexti->opcode = Op_push_arg_untyped;   /* argument may be untyped */
 4683     } else if (r->builtin == do_typeof) {
 4684         arg = subn->nexti;
 4685         if (arg->nexti == arg->lasti && arg->nexti->opcode == Op_push)
 4686             arg->nexti->opcode = Op_push_arg_untyped;   /* argument may be untyped */
 4687         if (nexp == 2) {    /* 2nd argument there */
 4688             arg = subn->nexti->lasti->nexti;    /* 2nd arg list */
 4689             ip = arg->lasti;
 4690             if (ip->opcode == Op_push)
 4691                 ip->opcode = Op_push_array;
 4692         }
 4693 #ifdef SUPPLY_INTDIV
 4694     } else if (r->builtin == do_intdiv
 4695 #ifdef HAVE_MPFR
 4696            || r->builtin == MPF(intdiv)
 4697 #endif
 4698             ) {
 4699         arg = subn->nexti->lasti->nexti->lasti->nexti;  /* 3rd arg list */
 4700         ip = arg->lasti;
 4701         if (ip->opcode == Op_push)
 4702             ip->opcode = Op_push_array;
 4703 #endif /* SUPPLY_INTDIV */
 4704     } else if (r->builtin == do_match) {
 4705         static bool warned = false;
 4706 
 4707         arg = subn->nexti->lasti->nexti;    /* 2nd arg list */
 4708         (void) mk_rexp(arg);
 4709 
 4710         if (nexp == 3) {    /* 3rd argument there */
 4711             if (do_lint_extensions && ! warned) {
 4712                 warned = true;
 4713                 lintwarn(_("match: third argument is a gawk extension"));
 4714             }
 4715             if (do_traditional) {
 4716                 yyerror(_("match: third argument is a gawk extension"));
 4717                 return NULL;
 4718             }
 4719 
 4720             arg = arg->lasti->nexti;    /* third arg list */
 4721             ip = arg->lasti;
 4722             if (/*ip == arg->nexti  && */ ip->opcode == Op_push)
 4723                 ip->opcode = Op_push_array;
 4724         }
 4725     } else if (r->builtin == do_split) {
 4726         arg = subn->nexti->lasti->nexti;    /* 2nd arg list */
 4727         ip = arg->lasti;
 4728         if (ip->opcode == Op_push)
 4729             ip->opcode = Op_push_array;
 4730         if (nexp == 2) {
 4731             INSTRUCTION *expr;
 4732             expr = list_create(instruction(Op_push));
 4733             expr->nexti->memory = FS_node;
 4734             (void) mk_expression_list(subn, expr);
 4735         }
 4736         arg = arg->lasti->nexti;
 4737         n = mk_rexp(arg);
 4738         if (nexp == 2)
 4739             n->re_flags |= FS_DFLT;
 4740         if (nexp == 4) {
 4741             arg = arg->lasti->nexti;
 4742             ip = arg->lasti;
 4743             if (ip->opcode == Op_push)
 4744                 ip->opcode = Op_push_array;
 4745         }
 4746     } else if (r->builtin == do_patsplit) {
 4747         arg = subn->nexti->lasti->nexti;    /* 2nd arg list */
 4748         ip = arg->lasti;
 4749         if (ip->opcode == Op_push)
 4750             ip->opcode = Op_push_array;
 4751         if (nexp == 2) {
 4752             INSTRUCTION *expr;
 4753             expr = list_create(instruction(Op_push));
 4754             expr->nexti->memory = FPAT_node;
 4755             (void) mk_expression_list(subn, expr);
 4756         }
 4757         arg = arg->lasti->nexti;
 4758         n = mk_rexp(arg);
 4759         if (nexp == 4) {
 4760             arg = arg->lasti->nexti;
 4761             ip = arg->lasti;
 4762             if (ip->opcode == Op_push)
 4763                 ip->opcode = Op_push_array;
 4764         }
 4765     } else if (r->builtin == do_close) {
 4766         static bool warned = false;
 4767         if (nexp == 2) {
 4768             if (do_lint_extensions && ! warned) {
 4769                 warned = true;
 4770                 lintwarn(_("close: second argument is a gawk extension"));
 4771             }
 4772             if (do_traditional) {
 4773                 yyerror(_("close: second argument is a gawk extension"));
 4774                 return NULL;
 4775             }
 4776         }
 4777     } else if (do_intl                  /* --gen-po */
 4778             && r->builtin == do_dcgettext       /* dcgettext(...) */
 4779             && subn->nexti->lasti->opcode == Op_push_i  /* 1st arg is constant */
 4780             && (subn->nexti->lasti->memory->flags & STRING) != 0) { /* it's a string constant */
 4781         /* ala xgettext, dcgettext("some string" ...) dumps the string */
 4782         NODE *str = subn->nexti->lasti->memory;
 4783 
 4784         if ((str->flags & INTLSTR) != 0)
 4785             warning(_("use of dcgettext(_\"...\") is incorrect: remove leading underscore"));
 4786             /* don't dump it, the lexer already did */
 4787         else
 4788             dumpintlstr(str->stptr, str->stlen);
 4789     } else if (do_intl                  /* --gen-po */
 4790             && r->builtin == do_dcngettext      /* dcngettext(...) */
 4791             && subn->nexti->lasti->opcode == Op_push_i  /* 1st arg is constant */
 4792             && (subn->nexti->lasti->memory->flags & STRING) != 0    /* it's a string constant */
 4793             && subn->nexti->lasti->nexti->lasti->opcode == Op_push_i    /* 2nd arg is constant too */
 4794             && (subn->nexti->lasti->nexti->lasti->memory->flags & STRING) != 0) {   /* it's a string constant */
 4795         /* ala xgettext, dcngettext("some string", "some plural" ...) dumps the string */
 4796         NODE *str1 = subn->nexti->lasti->memory;
 4797         NODE *str2 = subn->nexti->lasti->nexti->lasti->memory;
 4798 
 4799         if (((str1->flags | str2->flags) & INTLSTR) != 0)
 4800             warning(_("use of dcngettext(_\"...\") is incorrect: remove leading underscore"));
 4801         else
 4802             dumpintlstr2(str1->stptr, str1->stlen, str2->stptr, str2->stlen);
 4803     } else if (r->builtin == do_asort || r->builtin == do_asorti) {
 4804         arg = subn->nexti;  /* 1st arg list */
 4805         ip = arg->lasti;
 4806         if (ip->opcode == Op_push)
 4807             ip->opcode = Op_push_array;
 4808         if (nexp >= 2) {
 4809             arg = ip->nexti;
 4810             ip = arg->lasti;
 4811             if (ip->opcode == Op_push)
 4812                 ip->opcode = Op_push_array;
 4813         }
 4814     }
 4815     else if (r->builtin == do_index) {
 4816         arg = subn->nexti->lasti->nexti;    /* 2nd arg list */
 4817         ip = arg->lasti;
 4818         if (ip->opcode == Op_match_rec || ip->opcode == Op_push_re)
 4819             fatal(_("index: regexp constant as second argument is not allowed"));
 4820     }
 4821 #ifdef ARRAYDEBUG
 4822     else if (r->builtin == do_adump) {
 4823         ip = subn->nexti->lasti;
 4824         if (ip->opcode == Op_push)
 4825             ip->opcode = Op_push_array;
 4826     }
 4827 #endif
 4828 
 4829     if (subn != NULL) {
 4830         r->expr_count = count_expressions(&subn, false);
 4831         return list_append(subn, r);
 4832     }
 4833 
 4834     r->expr_count = 0;
 4835     return list_create(r);
 4836 }
 4837 
 4838 
 4839 /* parms_shadow --- check if parameters shadow globals */
 4840 
 4841 static int
 4842 parms_shadow(INSTRUCTION *pc, bool *shadow)
 4843 {
 4844     int pcount, i;
 4845     bool ret = false;
 4846     NODE *func, *fp;
 4847     char *fname;
 4848 
 4849     func = pc->func_body;
 4850     fname = func->vname;
 4851     fp = func->fparms;
 4852 
 4853 #if 0   /* can't happen, already exited if error ? */
 4854     if (fname == NULL || func == NULL)  /* error earlier */
 4855         return false;
 4856 #endif
 4857 
 4858     pcount = func->param_cnt;
 4859 
 4860     if (pcount == 0)        /* no args, no problem */
 4861         return 0;
 4862 
 4863     source = pc->source_file;
 4864     sourceline = pc->source_line;
 4865     /*
 4866      * Use warning() and not lintwarn() so that can warn
 4867      * about all shadowed parameters.
 4868      */
 4869     for (i = 0; i < pcount; i++) {
 4870         if (lookup(fp[i].param) != NULL) {
 4871             warning(
 4872     _("function `%s': parameter `%s' shadows global variable"),
 4873                     fname, fp[i].param);
 4874             ret = true;
 4875         }
 4876     }
 4877 
 4878     *shadow |= ret;
 4879     return 0;
 4880 }
 4881 
 4882 /* valinfo --- dump var info */
 4883 
 4884 void
 4885 valinfo(NODE *n, Func_print print_func, FILE *fp)
 4886 {
 4887     if (n == Nnull_string)
 4888         print_func(fp, "uninitialized scalar\n");
 4889     else if ((n->flags & REGEX) != 0)
 4890         print_func(fp, "@/%.*s/\n", n->stlen, n->stptr);
 4891     else if ((n->flags & STRING) != 0) {
 4892         pp_string_fp(print_func, fp, n->stptr, n->stlen, '"', false);
 4893         print_func(fp, "\n");
 4894     } else if ((n->flags & NUMBER) != 0) {
 4895 #ifdef HAVE_MPFR
 4896         if (is_mpg_float(n))
 4897             print_func(fp, "%s\n", mpg_fmt("%.17R*g", ROUND_MODE, n->mpg_numbr));
 4898         else if (is_mpg_integer(n))
 4899             print_func(fp, "%s\n", mpg_fmt("%Zd", n->mpg_i));
 4900         else
 4901 #endif
 4902         print_func(fp, "%.17g\n", n->numbr);
 4903     } else
 4904         print_func(fp, "?? flags %s\n", flags2str(n->flags));
 4905 }
 4906 
 4907 
 4908 /* dump_vars --- dump the symbol table */
 4909 
 4910 void
 4911 dump_vars(const char *fname)
 4912 {
 4913     FILE *fp;
 4914     NODE **vars;
 4915 
 4916     if (fname == NULL)
 4917         fp = stderr;
 4918     else if (strcmp(fname, "-") == 0)
 4919         fp = stdout;
 4920     else if ((fp = fopen(fname, "w")) == NULL) {
 4921         warning(_("could not open `%s' for writing: %s"), fname, strerror(errno));
 4922         warning(_("sending variable list to standard error"));
 4923         fp = stderr;
 4924     }
 4925 
 4926     vars = variable_list();
 4927     print_vars(vars, fprintf, fp);
 4928     efree(vars);
 4929     if (fp != stdout && fp != stderr && fclose(fp) != 0)
 4930         warning(_("%s: close failed: %s"), fname, strerror(errno));
 4931 }
 4932 
 4933 /* dump_funcs --- print all functions */
 4934 
 4935 void
 4936 dump_funcs()
 4937 {
 4938     NODE **funcs;
 4939     funcs = function_list(true);
 4940     (void) foreach_func(funcs, (int (*)(INSTRUCTION *, void *)) pp_func, (void *) 0);
 4941     efree(funcs);
 4942 }
 4943 
 4944 
 4945 /* shadow_funcs --- check all functions for parameters that shadow globals */
 4946 
 4947 void
 4948 shadow_funcs()
 4949 {
 4950     static int calls = 0;
 4951     bool shadow = false;
 4952     NODE **funcs;
 4953 
 4954     if (calls++ != 0)
 4955         fatal(_("shadow_funcs() called twice!"));
 4956 
 4957     funcs = function_list(true);
 4958     (void) foreach_func(funcs, (int (*)(INSTRUCTION *, void *)) parms_shadow, & shadow);
 4959     efree(funcs);
 4960 
 4961     /* End with fatal if the user requested it.  */
 4962     if (shadow && lintfunc == r_fatal)
 4963         lintwarn(_("there were shadowed variables."));
 4964 }
 4965 
 4966 
 4967 /* mk_function --- finalize function definition node; remove parameters
 4968  *  out of the symbol table.
 4969  */
 4970 
 4971 static INSTRUCTION *
 4972 mk_function(INSTRUCTION *fi, INSTRUCTION *def)
 4973 {
 4974     NODE *thisfunc;
 4975 
 4976     thisfunc = fi->func_body;
 4977     assert(thisfunc != NULL);
 4978 
 4979     /* add any pre-function comment to start of action for profile.c  */
 4980 
 4981     if (interblock_comment != NULL) {
 4982         interblock_comment->source_line = 0;
 4983         merge_comments(interblock_comment, fi->comment);
 4984         fi->comment = interblock_comment;
 4985         interblock_comment = NULL;
 4986     }
 4987 
 4988     /*
 4989      * Add an implicit return at end;
 4990      * also used by 'return' command in debugger
 4991      */
 4992 
 4993     (void) list_append(def, instruction(Op_push_i));
 4994     def->lasti->memory = dupnode(Nnull_string);
 4995     (void) list_append(def, instruction(Op_K_return));
 4996 
 4997     if (trailing_comment != NULL) {
 4998         (void) list_append(def, trailing_comment);
 4999         trailing_comment = NULL;
 5000     }
 5001 
 5002     if (do_pretty_print) {
 5003         fi[3].nexti = namespace_chain;
 5004         namespace_chain = NULL;
 5005         (void) list_prepend(def, instruction(Op_exec_count));
 5006     }
 5007 
 5008     /* fi->opcode = Op_func */
 5009     (fi + 1)->firsti = def->nexti;
 5010     (fi + 1)->lasti = def->lasti;
 5011     (fi + 2)->first_line = fi->source_line;
 5012     (fi + 2)->last_line = lastline;
 5013     fi->nexti = def->nexti;
 5014     bcfree(def);
 5015 
 5016     (void) list_append(rule_list, fi + 1);  /* debugging */
 5017 
 5018     /* update lint table info */
 5019     func_use(thisfunc->vname, FUNC_DEFINE);
 5020 
 5021     /* remove params from symbol table */
 5022     remove_params(thisfunc);
 5023     return fi;
 5024 }
 5025 
 5026 /*
 5027  * install_function:
 5028  * install function name in the symbol table.
 5029  * Extra work, build up and install a list of the parameter names.
 5030  */
 5031 
 5032 static int
 5033 install_function(char *fname, INSTRUCTION *fi, INSTRUCTION *plist)
 5034 {
 5035     NODE *r, *f;
 5036     int pcount = 0;
 5037 
 5038     r = lookup(fname);
 5039     if (r != NULL) {
 5040         error_ln(fi->source_line, _("function name `%s' previously defined"), fname);
 5041         return -1;
 5042     }
 5043 
 5044     if (plist != NULL)
 5045         pcount = plist->lasti->param_count + 1;
 5046     f = install_symbol(fname, Node_func);
 5047     if (f->vname != fname) {
 5048         // DON'T free fname, it's done later
 5049         fname = f->vname;
 5050     }
 5051 
 5052     fi->func_body = f;
 5053     f->param_cnt = pcount;
 5054     f->code_ptr = fi;
 5055     f->fparms = NULL;
 5056     if (pcount > 0) {
 5057         char **pnames;
 5058         pnames = check_params(fname, pcount, plist);    /* frees plist */
 5059         f->fparms = make_params(pnames, pcount);
 5060         efree(pnames);
 5061         install_params(f);
 5062     }
 5063     return 0;
 5064 }
 5065 
 5066 
 5067 /* check_params --- build a list of function parameter names after
 5068  *  making sure that the names are valid and there are no duplicates.
 5069  */
 5070 
 5071 static char **
 5072 check_params(char *fname, int pcount, INSTRUCTION *list)
 5073 {
 5074     INSTRUCTION *p, *np;
 5075     int i, j;
 5076     char *name;
 5077     char **pnames;
 5078 
 5079     assert(pcount > 0);
 5080 
 5081     emalloc(pnames, char **, pcount * sizeof(char *), "check_params");
 5082 
 5083     for (i = 0, p = list->nexti; p != NULL; i++, p = np) {
 5084         np = p->nexti;
 5085         name = p->lextok;
 5086         p->lextok = NULL;
 5087 
 5088         if (strcmp(name, fname) == 0) {
 5089             /* check for function foo(foo) { ... }.  bleah. */
 5090             error_ln(p->source_line,
 5091                 _("function `%s': cannot use function name as parameter name"), fname);
 5092         } else if (is_std_var(name)) {
 5093             error_ln(p->source_line,
 5094                 _("function `%s': cannot use special variable `%s' as a function parameter"),
 5095                     fname, name);
 5096         } else if (strchr(name, ':') != NULL)
 5097             error_ln(p->source_line,
 5098                 _("function `%s': parameter `%s' cannot contain a namespace"),
 5099                     fname, name);
 5100 
 5101         /* check for duplicate parameters */
 5102         for (j = 0; j < i; j++) {
 5103             if (strcmp(name, pnames[j]) == 0) {
 5104                 error_ln(p->source_line,
 5105                     _("function `%s': parameter #%d, `%s', duplicates parameter #%d"),
 5106                     fname, i + 1, name, j + 1);
 5107             }
 5108         }
 5109 
 5110         pnames[i] = name;
 5111         bcfree(p);
 5112     }
 5113     bcfree(list);
 5114 
 5115     return pnames;
 5116 }
 5117 
 5118 
 5119 #ifdef HASHSIZE
 5120 undef HASHSIZE
 5121 #endif
 5122 #define HASHSIZE 1021
 5123 
 5124 static struct fdesc {
 5125     char *name;
 5126     short used;
 5127     short defined;
 5128     short extension;
 5129     struct fdesc *next;
 5130 } *ftable[HASHSIZE];
 5131 
 5132 /* func_use --- track uses and definitions of functions */
 5133 
 5134 static void
 5135 func_use(const char *name, enum defref how)
 5136 {
 5137     struct fdesc *fp;
 5138     int len;
 5139     int ind;
 5140 
 5141     len = strlen(name);
 5142     ind = hash(name, len, HASHSIZE, NULL);
 5143 
 5144     for (fp = ftable[ind]; fp != NULL; fp = fp->next)
 5145         if (strcmp(fp->name, name) == 0)
 5146             goto update_value;
 5147 
 5148     /* not in the table, fall through to allocate a new one */
 5149 
 5150     ezalloc(fp, struct fdesc *, sizeof(struct fdesc), "func_use");
 5151     emalloc(fp->name, char *, len + 1, "func_use");
 5152     strcpy(fp->name, name);
 5153     fp->next = ftable[ind];
 5154     ftable[ind] = fp;
 5155 
 5156 update_value:
 5157     if (how == FUNC_DEFINE)
 5158         fp->defined++;
 5159     else if (how == FUNC_EXT) {
 5160         fp->defined++;
 5161         fp->extension++;
 5162     } else
 5163         fp->used++;
 5164 }
 5165 
 5166 /* track_ext_func --- add an extension function to the table */
 5167 
 5168 void
 5169 track_ext_func(const char *name)
 5170 {
 5171     func_use(name, FUNC_EXT);
 5172 }
 5173 
 5174 /* check_funcs --- verify functions that are called but not defined */
 5175 
 5176 static void
 5177 check_funcs()
 5178 {
 5179     struct fdesc *fp, *next;
 5180     int i;
 5181 
 5182     if (! in_main_context())
 5183         goto free_mem;
 5184 
 5185     for (i = 0; i < HASHSIZE; i++) {
 5186         for (fp = ftable[i]; fp != NULL; fp = fp->next) {
 5187             if (do_lint && ! fp->extension) {
 5188                 /*
 5189                  * Making this not a lint check and
 5190                  * incrementing * errcount breaks old code.
 5191                  * Sigh.
 5192                  */
 5193                 if (fp->defined == 0)
 5194                     lintwarn(_("function `%s' called but never defined"),
 5195                         fp->name);
 5196 
 5197                 if (fp->used == 0)
 5198                     lintwarn(_("function `%s' defined but never called directly"),
 5199                         fp->name);
 5200             }
 5201         }
 5202     }
 5203 
 5204 free_mem:
 5205     /* now let's free all the memory */
 5206     for (i = 0; i < HASHSIZE; i++) {
 5207         for (fp = ftable[i]; fp != NULL; fp = next) {
 5208             next = fp->next;
 5209             efree(fp->name);
 5210             efree(fp);
 5211         }
 5212         ftable[i] = NULL;
 5213     }
 5214 }
 5215 
 5216 /* param_sanity --- look for parameters that are regexp constants */
 5217 
 5218 static void
 5219 param_sanity(INSTRUCTION *arglist)
 5220 {
 5221     INSTRUCTION *argl, *arg;
 5222     int i = 1;
 5223 
 5224     if (arglist == NULL)
 5225         return;
 5226     for (argl = arglist->nexti; argl; ) {
 5227         arg = argl->lasti;
 5228         if (arg->opcode == Op_match_rec)
 5229             warning_ln(arg->source_line,
 5230                 _("regexp constant for parameter #%d yields boolean value"), i);
 5231         argl = arg->nexti;
 5232         i++;
 5233     }
 5234 }
 5235 
 5236 /* variable --- make sure NAME is in the symbol table */
 5237 
 5238 NODE *
 5239 variable(int location, char *name, NODETYPE type)
 5240 {
 5241     NODE *r;
 5242 
 5243     if ((r = lookup(name)) != NULL) {
 5244         if (r->type == Node_func || r->type == Node_ext_func )
 5245             error_ln(location, _("function `%s' called with space between name and `(',\nor used as a variable or an array"),
 5246                 r->vname);
 5247     } else {
 5248         /* not found */
 5249         return install_symbol(name, type);
 5250     }
 5251     efree(name);
 5252     return r;
 5253 }
 5254 
 5255 /* make_regnode --- make a regular expression node */
 5256 
 5257 NODE *
 5258 make_regnode(int type, NODE *exp)
 5259 {
 5260     NODE *n;
 5261 
 5262     assert(type == Node_regex || type == Node_dynregex);
 5263     getnode(n);
 5264     memset(n, 0, sizeof(NODE));
 5265     n->type = type;
 5266     n->re_cnt = 1;
 5267 
 5268     if (type == Node_regex) {
 5269         n->re_reg[0] = make_regexp(exp->stptr, exp->stlen, false, true, false);
 5270         if (n->re_reg[0] == NULL) {
 5271             freenode(n);
 5272             return NULL;
 5273         }
 5274         n->re_exp = exp;
 5275         n->re_flags = CONSTANT;
 5276     }
 5277     return n;
 5278 }
 5279 
 5280 
 5281 /* mk_rexp --- make a regular expression constant */
 5282 
 5283 static NODE *
 5284 mk_rexp(INSTRUCTION *list)
 5285 {
 5286     INSTRUCTION *ip;
 5287 
 5288     ip = list->nexti;
 5289     if (ip == list->lasti && ip->opcode == Op_match_rec)
 5290         ip->opcode = Op_push_re;
 5291     else if (ip == list->lasti && ip->opcode == Op_push_re)
 5292         ; /* do nothing --- @/.../ */
 5293     else {
 5294         ip = instruction(Op_push_re);
 5295         ip->memory = make_regnode(Node_dynregex, NULL);
 5296         ip->nexti = list->lasti->nexti;
 5297         list->lasti->nexti = ip;
 5298         list->lasti = ip;
 5299     }
 5300     return ip->memory;
 5301 }
 5302 
 5303 #ifndef NO_LINT
 5304 /* isnoeffect --- when used as a statement, has no side effects */
 5305 
 5306 static int
 5307 isnoeffect(OPCODE type)
 5308 {
 5309     switch (type) {
 5310     case Op_times:
 5311     case Op_times_i:
 5312     case Op_quotient:
 5313     case Op_quotient_i:
 5314     case Op_mod:
 5315     case Op_mod_i:
 5316     case Op_plus:
 5317     case Op_plus_i:
 5318     case Op_minus:
 5319     case Op_minus_i:
 5320     case Op_subscript:
 5321     case Op_concat:
 5322     case Op_exp:
 5323     case Op_exp_i:
 5324     case Op_unary_minus:
 5325     case Op_field_spec:
 5326     case Op_and_final:
 5327     case Op_or_final:
 5328     case Op_equal:
 5329     case Op_notequal:
 5330     case Op_less:
 5331     case Op_greater:
 5332     case Op_leq:
 5333     case Op_geq:
 5334     case Op_match:
 5335     case Op_nomatch:
 5336     case Op_match_rec:
 5337     case Op_not:
 5338     case Op_in_array:
 5339         return true;
 5340     default:
 5341         break;  /* keeps gcc -Wall happy */
 5342     }
 5343 
 5344     return false;
 5345 }
 5346 #endif /* NO_LINT */
 5347 
 5348 
 5349 /* make_assignable --- make this operand an assignable one if posiible */
 5350 
 5351 static INSTRUCTION *
 5352 make_assignable(INSTRUCTION *ip)
 5353 {
 5354     switch (ip->opcode) {
 5355     case Op_push:
 5356         ip->opcode = Op_push_lhs;
 5357         return ip;
 5358     case Op_field_spec:
 5359         ip->opcode = Op_field_spec_lhs;
 5360         return ip;
 5361     case Op_subscript:
 5362         ip->opcode = Op_subscript_lhs;
 5363         return ip;
 5364     default:
 5365         break;  /* keeps gcc -Wall happy */
 5366     }
 5367     return NULL;
 5368 }
 5369 
 5370 /* stopme --- for debugging */
 5371 
 5372 NODE *
 5373 stopme(int nargs ATTRIBUTE_UNUSED)
 5374 {
 5375     return make_number(0.0);
 5376 }
 5377 
 5378 /* dumpintlstr --- write out an initial .po file entry for the string */
 5379 
 5380 static void
 5381 dumpintlstr(const char *str, size_t len)
 5382 {
 5383     char *cp;
 5384 
 5385     /* See the GNU gettext distribution for details on the file format */
 5386 
 5387     if (source != NULL) {
 5388         /* ala the gettext sources, remove leading `./'s */
 5389         for (cp = source; cp[0] == '.' && cp[1] == '/'; cp += 2)
 5390             continue;
 5391         printf("#: %s:%d\n", cp, sourceline);
 5392     }
 5393 
 5394     printf("msgid ");
 5395     pp_string_fp(fprintf, stdout, str, len, '"', true);
 5396     putchar('\n');
 5397     printf("msgstr \"\"\n\n");
 5398     fflush(stdout);
 5399 }
 5400 
 5401 /* dumpintlstr2 --- write out an initial .po file entry for the string and its plural */
 5402 
 5403 static void
 5404 dumpintlstr2(const char *str1, size_t len1, const char *str2, size_t len2)
 5405 {
 5406     char *cp;
 5407 
 5408     /* See the GNU gettext distribution for details on the file format */
 5409 
 5410     if (source != NULL) {
 5411         /* ala the gettext sources, remove leading `./'s */
 5412         for (cp = source; cp[0] == '.' && cp[1] == '/'; cp += 2)
 5413             continue;
 5414         printf("#: %s:%d\n", cp, sourceline);
 5415     }
 5416 
 5417     printf("msgid ");
 5418     pp_string_fp(fprintf, stdout, str1, len1, '"', true);
 5419     putchar('\n');
 5420     printf("msgid_plural ");
 5421     pp_string_fp(fprintf, stdout, str2, len2, '"', true);
 5422     putchar('\n');
 5423     printf("msgstr[0] \"\"\nmsgstr[1] \"\"\n\n");
 5424     fflush(stdout);
 5425 }
 5426 
 5427 /* mk_binary --- instructions for binary operators */
 5428 
 5429 static INSTRUCTION *
 5430 mk_binary(INSTRUCTION *s1, INSTRUCTION *s2, INSTRUCTION *op)
 5431 {
 5432     INSTRUCTION *ip1,*ip2;
 5433     AWKNUM res;
 5434 
 5435     ip2 = s2->nexti;
 5436     if (s2->lasti == ip2 && ip2->opcode == Op_push_i) {
 5437     /* do any numeric constant folding */
 5438         ip1 = s1->nexti;
 5439         if (do_optimize
 5440                 && ip1 == s1->lasti && ip1->opcode == Op_push_i
 5441                 && (ip1->memory->flags & (MPFN|MPZN|STRCUR|STRING)) == 0
 5442                 && (ip2->memory->flags & (MPFN|MPZN|STRCUR|STRING)) == 0
 5443         ) {
 5444             NODE *n1 = ip1->memory, *n2 = ip2->memory;
 5445             res = force_number(n1)->numbr;
 5446             (void) force_number(n2);
 5447             switch (op->opcode) {
 5448             case Op_times:
 5449                 res *= n2->numbr;
 5450                 break;
 5451             case Op_quotient:
 5452                 if (n2->numbr == 0.0) {
 5453                     /* don't fatalize, allow parsing rest of the input */
 5454                     error_ln(op->source_line, _("division by zero attempted"));
 5455                     goto regular;
 5456                 }
 5457 
 5458                 res /= n2->numbr;
 5459                 break;
 5460             case Op_mod:
 5461                 if (n2->numbr == 0.0) {
 5462                     /* don't fatalize, allow parsing rest of the input */
 5463                     error_ln(op->source_line, _("division by zero attempted in `%%'"));
 5464                     goto regular;
 5465                 }
 5466 #ifdef HAVE_FMOD
 5467                 res = fmod(res, n2->numbr);
 5468 #else   /* ! HAVE_FMOD */
 5469                 (void) modf(res / n2->numbr, &res);
 5470                 res = n1->numbr - res * n2->numbr;
 5471 #endif  /* ! HAVE_FMOD */
 5472                 break;
 5473             case Op_plus:
 5474                 res += n2->numbr;
 5475                 break;
 5476             case Op_minus:
 5477                 res -= n2->numbr;
 5478                 break;
 5479             case Op_exp:
 5480                 res = calc_exp(res, n2->numbr);
 5481                 break;
 5482             default:
 5483                 goto regular;
 5484             }
 5485 
 5486             op->opcode = Op_push_i;
 5487             // We don't need to call set_profile_text() here since
 5488             // optimizing is disabled when doing pretty printing.
 5489             op->memory = make_number(res);
 5490             unref(n1);
 5491             unref(n2);
 5492             bcfree(ip1);
 5493             bcfree(ip2);
 5494             bcfree(s1);
 5495             bcfree(s2);
 5496             return list_create(op);
 5497         } else {
 5498         /* do basic arithmetic optimisation */
 5499         /* convert (Op_push_i Node_val) + (Op_plus) to (Op_plus_i Node_val) */
 5500             switch (op->opcode) {
 5501             case Op_times:
 5502                 op->opcode = Op_times_i;
 5503                 break;
 5504             case Op_quotient:
 5505                 op->opcode = Op_quotient_i;
 5506                 break;
 5507             case Op_mod:
 5508                 op->opcode = Op_mod_i;
 5509                 break;
 5510             case Op_plus:
 5511                 op->opcode = Op_plus_i;
 5512                 break;
 5513             case Op_minus:
 5514                 op->opcode = Op_minus_i;
 5515                 break;
 5516             case Op_exp:
 5517                 op->opcode = Op_exp_i;
 5518                 break;
 5519             default:
 5520                 goto regular;
 5521             }
 5522 
 5523             op->memory = ip2->memory;
 5524             bcfree(ip2);
 5525             bcfree(s2); /* Op_list */
 5526             return list_append(s1, op);
 5527         }
 5528     }
 5529 
 5530 regular:
 5531     /* append lists s1, s2 and add `op' bytecode */
 5532     (void) list_merge(s1, s2);
 5533     return list_append(s1, op);
 5534 }
 5535 
 5536 /* mk_boolean --- instructions for boolean and, or */
 5537 
 5538 static INSTRUCTION *
 5539 mk_boolean(INSTRUCTION *left, INSTRUCTION *right, INSTRUCTION *op)
 5540 {
 5541     INSTRUCTION *tp;
 5542     OPCODE opc, final_opc;
 5543 
 5544     opc = op->opcode;       /* Op_and or Op_or */
 5545     final_opc = (opc == Op_or) ? Op_or_final : Op_and_final;
 5546 
 5547     add_lint(right, LINT_assign_in_cond);
 5548 
 5549     tp = left->lasti;
 5550 
 5551     if (tp->opcode != final_opc) {  /* x || y */
 5552         list_append(right, instruction(final_opc));
 5553         add_lint(left, LINT_assign_in_cond);
 5554         (void) list_append(left, op);
 5555         left->lasti->target_jmp = right->lasti;
 5556 
 5557         /* NB: target_stmt points to previous Op_and(Op_or) in a chain;
 5558          *     target_stmt only used in the parser (see below).
 5559          */
 5560 
 5561         left->lasti->target_stmt = left->lasti;
 5562         right->lasti->target_stmt = left->lasti;
 5563     } else {        /* optimization for x || y || z || ... */
 5564         INSTRUCTION *ip;
 5565 
 5566         op->opcode = final_opc;
 5567         (void) list_append(right, op);
 5568         op->target_stmt = tp;
 5569         tp->opcode = opc;
 5570         tp->target_jmp = op;
 5571 
 5572         /* update jump targets */
 5573         for (ip = tp->target_stmt; ; ip = ip->target_stmt) {
 5574             assert(ip->opcode == opc);
 5575             assert(ip->target_jmp == tp);
 5576             /* if (ip->opcode == opc &&  ip->target_jmp == tp) */
 5577             ip->target_jmp = op;
 5578             if (ip->target_stmt == ip)
 5579                 break;
 5580         }
 5581     }
 5582 
 5583     return list_merge(left, right);
 5584 }
 5585 
 5586 /* mk_condition --- if-else and conditional */
 5587 
 5588 static INSTRUCTION *
 5589 mk_condition(INSTRUCTION *cond, INSTRUCTION *ifp, INSTRUCTION *true_branch,
 5590         INSTRUCTION *elsep, INSTRUCTION *false_branch)
 5591 {
 5592     /*
 5593      *    ----------------
 5594      *       cond
 5595      *    ----------------
 5596      * t: [Op_jmp_false f ]
 5597      *    ----------------
 5598      *       true_branch
 5599      *
 5600      *    ----------------
 5601      *    [Op_jmp y]
 5602      *    ----------------
 5603      * f:
 5604      *      false_branch
 5605      *    ----------------
 5606      * y: [Op_no_op]
 5607      *    ----------------
 5608      */
 5609 
 5610     INSTRUCTION *ip;
 5611     bool setup_else_part = true;
 5612 
 5613     if (false_branch == NULL) {
 5614         false_branch = list_create(instruction(Op_no_op));
 5615         if (elsep == NULL) {        /* else { } */
 5616             setup_else_part = false;
 5617         }
 5618     } else {
 5619         /* assert(elsep != NULL); */
 5620 
 5621         /* avoid a series of no_op's: if .. else if .. else if .. */
 5622         if (false_branch->lasti->opcode != Op_no_op)
 5623             (void) list_append(false_branch, instruction(Op_no_op));
 5624     }
 5625 
 5626     if (setup_else_part) {
 5627         if (do_pretty_print) {
 5628             (void) list_prepend(false_branch, elsep);
 5629             false_branch->nexti->branch_end = false_branch->lasti;
 5630             (void) list_prepend(false_branch, instruction(Op_exec_count));
 5631         } else
 5632             bcfree(elsep);
 5633     }
 5634 
 5635     (void) list_prepend(false_branch, instruction(Op_jmp));
 5636     false_branch->nexti->target_jmp = false_branch->lasti;
 5637 
 5638     add_lint(cond, LINT_assign_in_cond);
 5639     ip = list_append(cond, instruction(Op_jmp_false));
 5640     ip->lasti->target_jmp = false_branch->nexti->nexti;
 5641 
 5642     if (do_pretty_print) {
 5643         (void) list_prepend(ip, ifp);
 5644         (void) list_append(ip, instruction(Op_exec_count));
 5645         ip->nexti->branch_if = ip->lasti;
 5646         ip->nexti->branch_else = false_branch->nexti;
 5647     } else
 5648         bcfree(ifp);
 5649 
 5650     if (true_branch != NULL)
 5651         list_merge(ip, true_branch);
 5652     return list_merge(ip, false_branch);
 5653 }
 5654 
 5655 enum defline { FIRST_LINE, LAST_LINE };
 5656 
 5657 /* find_line -- find the first(last) line in a list of (pattern) instructions */
 5658 
 5659 static int
 5660 find_line(INSTRUCTION *pattern, enum defline what)
 5661 {
 5662     INSTRUCTION *ip;
 5663     int lineno = 0;
 5664 
 5665     for (ip = pattern->nexti; ip; ip = ip->nexti) {
 5666         if (what == LAST_LINE) {
 5667             if (ip->source_line > lineno)
 5668                 lineno = ip->source_line;
 5669         } else {    /* FIRST_LINE */
 5670             if (ip->source_line > 0
 5671                     && (lineno == 0 || ip->source_line < lineno))
 5672                 lineno = ip->source_line;
 5673         }
 5674         if (ip == pattern->lasti)
 5675             break;
 5676     }
 5677     assert(lineno > 0);
 5678     return lineno;
 5679 }
 5680 
 5681 /* append_rule --- pattern-action instructions */
 5682 
 5683 static INSTRUCTION *
 5684 append_rule(INSTRUCTION *pattern, INSTRUCTION *action)
 5685 {
 5686     /*
 5687      *    ----------------
 5688      *       pattern
 5689      *    ----------------
 5690      *    [Op_jmp_false f ]
 5691      *    ----------------
 5692      *       action
 5693      *    ----------------
 5694      * f: [Op_no_op       ]
 5695      *    ----------------
 5696      */
 5697 
 5698     INSTRUCTION *rp;
 5699     INSTRUCTION *tp;
 5700     INSTRUCTION *ip;
 5701 
 5702     if (rule != Rule) {
 5703         rp = pattern;
 5704         if (do_pretty_print) {
 5705             rp[3].nexti = namespace_chain;
 5706             namespace_chain = NULL;
 5707             (void) list_append(action, instruction(Op_no_op));
 5708         }
 5709         (rp + 1)->firsti = action->nexti;
 5710         (rp + 1)->lasti = action->lasti;
 5711         (rp + 2)->first_line = pattern->source_line;
 5712         (rp + 2)->last_line = lastline;
 5713         ip = list_prepend(action, rp);
 5714         if (interblock_comment != NULL) {
 5715             ip = list_prepend(ip, interblock_comment);
 5716             interblock_comment = NULL;
 5717         }
 5718     } else {
 5719         rp = bcalloc(Op_rule, 4, 0);
 5720         rp->in_rule = Rule;
 5721         rp->source_file = source;
 5722         tp = instruction(Op_no_op);
 5723 
 5724         if (do_pretty_print) {
 5725             rp[3].nexti = namespace_chain;
 5726             namespace_chain = NULL;
 5727         }
 5728 
 5729         if (pattern == NULL) {
 5730             /* assert(action != NULL); */
 5731             if (do_pretty_print)
 5732                 (void) list_prepend(action, instruction(Op_exec_count));
 5733             (rp + 1)->firsti = action->nexti;
 5734             (rp + 1)->lasti = tp;
 5735             (rp + 2)->first_line = firstline;
 5736             (rp + 2)->last_line = lastline;
 5737             rp->source_line = firstline;
 5738             ip = list_prepend(list_append(action, tp), rp);
 5739         } else {
 5740             (void) list_append(pattern, instruction(Op_jmp_false));
 5741             pattern->lasti->target_jmp = tp;
 5742             (rp + 2)->first_line = find_line(pattern, FIRST_LINE);
 5743             rp->source_line = (rp + 2)->first_line;
 5744             if (action == NULL) {
 5745                 (rp + 2)->last_line = find_line(pattern, LAST_LINE);
 5746                 action = list_create(instruction(Op_K_print_rec));
 5747                 if (do_pretty_print)
 5748                     action = list_prepend(action, instruction(Op_exec_count));
 5749             } else
 5750                 (rp + 2)->last_line = lastline;
 5751 
 5752             if (interblock_comment != NULL) {   // was after previous action
 5753                 pattern = list_prepend(pattern, interblock_comment);
 5754                 interblock_comment = NULL;
 5755             }
 5756 
 5757             if (do_pretty_print) {
 5758                 pattern = list_prepend(pattern, instruction(Op_exec_count));
 5759                 action = list_prepend(action, instruction(Op_exec_count));
 5760             }
 5761 
 5762             (rp + 1)->firsti = action->nexti;
 5763             (rp + 1)->lasti = tp;
 5764             ip = list_append(
 5765                     list_merge(list_prepend(pattern, rp),
 5766                         action),
 5767                     tp);
 5768         }
 5769     }
 5770 
 5771     list_append(rule_list, rp + 1);
 5772 
 5773     if (rule_block[rule] == NULL)
 5774         rule_block[rule] = ip;
 5775     else
 5776         (void) list_merge(rule_block[rule], ip);
 5777 
 5778     return rule_block[rule];
 5779 }
 5780 
 5781 /* mk_assignment --- assignment bytecodes */
 5782 
 5783 static INSTRUCTION *
 5784 mk_assignment(INSTRUCTION *lhs, INSTRUCTION *rhs, INSTRUCTION *op)
 5785 {
 5786     INSTRUCTION *tp;
 5787     INSTRUCTION *ip;
 5788 
 5789     tp = lhs->lasti;
 5790     switch (tp->opcode) {
 5791     case Op_field_spec:
 5792         tp->opcode = Op_field_spec_lhs;
 5793         break;
 5794     case Op_subscript:
 5795         tp->opcode = Op_subscript_lhs;
 5796         break;
 5797     case Op_push:
 5798     case Op_push_array:
 5799         tp->opcode = Op_push_lhs;
 5800         break;
 5801     case Op_field_assign:
 5802         yyerror(_("cannot assign a value to the result of a field post-increment expression"));
 5803         break;
 5804     default:
 5805         yyerror(_("invalid target of assignment (opcode %s)"),
 5806                 opcode2str(tp->opcode));
 5807         break;
 5808     }
 5809 
 5810     tp->do_reference = (op->opcode != Op_assign);   /* check for uninitialized reference */
 5811 
 5812     if (rhs != NULL)
 5813         ip = list_merge(rhs, lhs);
 5814     else
 5815         ip = lhs;
 5816 
 5817     (void) list_append(ip, op);
 5818 
 5819     if (tp->opcode == Op_push_lhs
 5820             && tp->memory->type == Node_var
 5821             && tp->memory->var_assign
 5822     ) {
 5823         tp->do_reference = false; /* no uninitialized reference checking
 5824                                    * for a special variable.
 5825                                    */
 5826         (void) list_append(ip, instruction(Op_var_assign));
 5827         ip->lasti->assign_var = tp->memory->var_assign;
 5828     } else if (tp->opcode == Op_field_spec_lhs) {
 5829         (void) list_append(ip, instruction(Op_field_assign));
 5830         ip->lasti->field_assign = (Func_ptr) 0;
 5831         tp->target_assign = ip->lasti;
 5832     } else if (tp->opcode == Op_subscript_lhs) {
 5833         (void) list_append(ip, instruction(Op_subscript_assign));
 5834     }
 5835 
 5836     return ip;
 5837 }
 5838 
 5839 /* optimize_assignment --- peephole optimization for assignment */
 5840 
 5841 static INSTRUCTION *
 5842 optimize_assignment(INSTRUCTION *exp)
 5843 {
 5844     INSTRUCTION *i1, *i2, *i3;
 5845 
 5846     /*
 5847      * Optimize assignment statements array[subs] = x; var = x; $n = x;
 5848      * string concatenation of the form s = s t.
 5849      *
 5850      * 1) Array element assignment array[subs] = x:
 5851      *   Replaces Op_push_array + Op_subscript_lhs + Op_assign + Op_pop
 5852      *   with single instruction Op_store_sub.
 5853      *   Limitation: 1 dimension and sub is simple var/value.
 5854      *
 5855      * 2) Simple variable assignment var = x:
 5856      *   Replaces Op_push_lhs + Op_assign + Op_pop with Op_store_var.
 5857      *
 5858      * 3) Field assignment $n = x:
 5859      *   Replaces Op_field_spec_lhs + Op_assign + Op_field_assign + Op_pop
 5860      *   with Op_store_field.
 5861      *
 5862      * 4) Optimization for string concatenation:
 5863      *   For cases like x = x y, uses realloc to include y in x;
 5864      *   also eliminates instructions Op_push_lhs and Op_pop.
 5865      */
 5866 
 5867     /*
 5868      * N.B.: do not append Op_pop instruction to the returned
 5869      * instruction list if optimized. None of these
 5870      * optimized instructions pushes the r-value of assignment
 5871      * onto the runtime stack.
 5872      */