"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "field.c" between
gawk-5.0.1.tar.xz and gawk-5.1.0.tar.xz

About: GNU awk - pattern scanning and processing language.

field.c  (gawk-5.0.1.tar.xz):field.c  (gawk-5.1.0.tar.xz)
/* /*
* field.c - routines for dealing with fields and record parsing * field.c - routines for dealing with fields and record parsing
*/ */
/* /*
* Copyright (C) 1986, 1988, 1989, 1991-2018 the Free Software Foundation, Inc. * Copyright (C) 1986, 1988, 1989, 1991-2020 the Free Software Foundation, Inc.
* *
* This file is part of GAWK, the GNU implementation of the * This file is part of GAWK, the GNU implementation of the
* AWK Programming Language. * AWK Programming Language.
* *
* GAWK is free software; you can redistribute it and/or modify * GAWK is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or * the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version. * (at your option) any later version.
* *
* GAWK is distributed in the hope that it will be useful, * GAWK is distributed in the hope that it will be useful,
skipping to change at line 77 skipping to change at line 77
static void set_field(long num, char *str, long len, NODE *dummy); static void set_field(long num, char *str, long len, NODE *dummy);
static void purge_record(void); static void purge_record(void);
static char *parse_extent; /* marks where to restart parse of record */ static char *parse_extent; /* marks where to restart parse of record */
static long parse_high_water = 0; /* field number that we have parsed so far */ static long parse_high_water = 0; /* field number that we have parsed so far */
static long nf_high_water = 0; /* size of fields_arr */ static long nf_high_water = 0; /* size of fields_arr */
static bool resave_fs; static bool resave_fs;
static NODE *save_FS; /* save current value of FS when line is read, static NODE *save_FS; /* save current value of FS when line is read,
* to be used in deferred parsing * to be used in deferred parsing
*/ */
static NODE *save_FPAT; /* save current value of FPAT when line i
s read,
* to be used in deferred parsing
*/
static awk_fieldwidth_info_t *FIELDWIDTHS = NULL; static awk_fieldwidth_info_t *FIELDWIDTHS = NULL;
NODE **fields_arr; /* array of pointers to the field nodes */ NODE **fields_arr; /* array of pointers to the field nodes */
bool field0_valid; /* $(>0) has not been changed yet */ bool field0_valid; /* $(>0) has not been changed yet */
int default_FS; /* true when FS == " " */ int default_FS; /* true when FS == " " */
Regexp *FS_re_yes_case = NULL; Regexp *FS_re_yes_case = NULL;
Regexp *FS_re_no_case = NULL; Regexp *FS_re_no_case = NULL;
Regexp *FS_regexp = NULL; Regexp *FS_regexp = NULL;
Regexp *FPAT_re_yes_case = NULL; Regexp *FPAT_re_yes_case = NULL;
Regexp *FPAT_re_no_case = NULL; Regexp *FPAT_re_no_case = NULL;
Regexp *FPAT_regexp = NULL; Regexp *FPAT_regexp = NULL;
NODE *Null_field = NULL; NODE *Null_field = NULL;
/* init_fields --- set up the fields array to start with */ /* init_fields --- set up the fields array to start with */
void void
init_fields() init_fields()
{ {
emalloc(fields_arr, NODE **, sizeof(NODE *), "init_fields"); emalloc(fields_arr, NODE **, sizeof(NODE *), "init_fields");
getnode(fields_arr[0]); fields_arr[0] = make_string("", 0);
*fields_arr[0] = *Nnull_string;
fields_arr[0]->flags |= NULL_FIELD; fields_arr[0]->flags |= NULL_FIELD;
parse_extent = fields_arr[0]->stptr; parse_extent = fields_arr[0]->stptr;
save_FS = dupnode(FS_node->var_value); save_FS = dupnode(FS_node->var_value);
getnode(Null_field); Null_field = make_string("", 0);
*Null_field = *Nnull_string;
Null_field->valref = 1;
Null_field->flags = (STRCUR|STRING|NULL_FIELD); /* do not set MALLOC */ Null_field->flags = (STRCUR|STRING|NULL_FIELD); /* do not set MALLOC */
field0_valid = true; field0_valid = true;
} }
/* grow_fields --- acquire new fields as needed */ /* grow_fields --- acquire new fields as needed */
static void static void
grow_fields_arr(long num) grow_fields_arr(long num)
{ {
skipping to change at line 291 skipping to change at line 291
if (cnt >= databuf_size) { if (cnt >= databuf_size) {
do { do {
if (databuf_size > MAX_SIZE/2) if (databuf_size > MAX_SIZE/2)
fatal(_("input record too large")); fatal(_("input record too large"));
databuf_size *= 2; databuf_size *= 2;
} while (cnt >= databuf_size); } while (cnt >= databuf_size);
erealloc(databuf, char *, databuf_size, "set_record"); erealloc(databuf, char *, databuf_size, "set_record");
memset(databuf, '\0', databuf_size); memset(databuf, '\0', databuf_size);
} }
/* copy the data */ /* copy the data */
memcpy(databuf, buf, cnt); if (cnt != 0) {
memcpy(databuf, buf, cnt);
}
/* /*
* Add terminating '\0' so that C library routines * Add terminating '\0' so that C library routines
* will know when to stop. * will know when to stop.
*/ */
databuf[cnt] = '\0'; databuf[cnt] = '\0';
/* manage field 0: */ /* manage field 0: */
assert((fields_arr[0]->flags & MALLOC) == 0 assert((fields_arr[0]->flags & MALLOC) == 0
? fields_arr[0]->valref == 1 ? fields_arr[0]->valref == 1
skipping to change at line 467 skipping to change at line 469
memset(&mbs, 0, sizeof(mbstate_t)); memset(&mbs, 0, sizeof(mbstate_t));
if (in_middle) if (in_middle)
regex_flags |= RE_NO_BOL; regex_flags |= RE_NO_BOL;
if (up_to == UNLIMITED) if (up_to == UNLIMITED)
nf = 0; nf = 0;
if (len == 0) if (len == 0)
return nf; return nf;
if (RS_is_null && default_FS) { bool default_field_splitting = (RS_is_null && default_FS);
if (default_field_splitting) {
sep = scan; sep = scan;
while (scan < end && (*scan == ' ' || *scan == '\t' || *scan == ' \n')) while (scan < end && (*scan == ' ' || *scan == '\t' || *scan == ' \n'))
scan++; scan++;
if (sep_arr != NULL && sep < scan) if (sep_arr != NULL && sep < scan)
set_element(nf, sep, (long)(scan - sep), sep_arr); set_element(nf, sep, (long)(scan - sep), sep_arr);
} }
if (rp == NULL) /* use FS */ if (rp == NULL) /* use FS */
rp = FS_regexp; rp = FS_regexp;
skipping to change at line 508 skipping to change at line 512
} }
continue; continue;
} }
(*set)(++nf, field, (*set)(++nf, field,
(long)(scan + RESTART(rp, scan) - field), n); (long)(scan + RESTART(rp, scan) - field), n);
if (sep_arr != NULL) if (sep_arr != NULL)
set_element(nf, scan + RESTART(rp, scan), set_element(nf, scan + RESTART(rp, scan),
(long) (REEND(rp, scan) - RESTART(rp, scan)), sep _arr); (long) (REEND(rp, scan) - RESTART(rp, scan)), sep _arr);
scan += REEND(rp, scan); scan += REEND(rp, scan);
field = scan; field = scan;
if (scan == end) /* FS at end of record */ if (scan == end && ! default_field_splitting) /* FS at end of r ecord */
(*set)(++nf, field, 0L, n); (*set)(++nf, field, 0L, n);
} }
if (nf != up_to && scan < end) { if (nf != up_to && scan < end) {
(*set)(++nf, scan, (long)(end - scan), n); (*set)(++nf, scan, (long)(end - scan), n);
scan = end; scan = end;
} }
*buf = scan; *buf = scan;
return nf; return nf;
} }
skipping to change at line 844 skipping to change at line 848
/* get_field --- return a particular $n */ /* get_field --- return a particular $n */
/* assign is not NULL if this field is on the LHS of an assign */ /* assign is not NULL if this field is on the LHS of an assign */
NODE ** NODE **
get_field(long requested, Func_ptr *assign) get_field(long requested, Func_ptr *assign)
{ {
bool in_middle = false; bool in_middle = false;
static bool warned = false; static bool warned = false;
extern int currule; extern int currule;
NODE *saved_fs;
Regexp *fs_regexp;
if (do_lint && currule == END && ! warned) { if (do_lint && currule == END && ! warned) {
warned = true; warned = true;
lintwarn(_("accessing fields from an END rule may not be portable ")); lintwarn(_("accessing fields from an END rule may not be portable "));
} }
/* /*
* if requesting whole line but some other field has been altered, * if requesting whole line but some other field has been altered,
* then the whole line must be rebuilt * then the whole line must be rebuilt
*/ */
if (requested == 0) { if (requested == 0) {
if (! field0_valid) { if (! field0_valid) {
/* first, parse remainder of input record */ /* first, parse remainder of input record */
if (NF == -1) { if (NF == -1) {
in_middle = (parse_high_water != 0); in_middle = (parse_high_water != 0);
if (current_field_sep() == Using_FPAT) {
saved_fs = save_FPAT;
fs_regexp = FPAT_regexp;
} else {
saved_fs = save_FS;
fs_regexp = FS_regexp;
}
NF = (*parse_field)(UNLIMITED - 1, &parse_extent, NF = (*parse_field)(UNLIMITED - 1, &parse_extent,
fields_arr[0]->stlen - fields_arr[0]->stlen -
(parse_extent - fields_arr[0]->stptr), (parse_extent - fields_arr[0]->stptr),
save_FS, FS_regexp, set_field, saved_fs, fs_regexp, set_field,
(NODE *) NULL, (NODE *) NULL,
(NODE *) NULL, (NODE *) NULL,
in_middle); in_middle);
parse_high_water = NF; parse_high_water = NF;
} }
rebuild_record(); rebuild_record();
} }
if (assign != NULL) if (assign != NULL)
*assign = reset_record; *assign = reset_record;
return &fields_arr[0]; return &fields_arr[0];
skipping to change at line 919 skipping to change at line 932
/* /*
* if we reached the end of the record, set NF to the number of * if we reached the end of the record, set NF to the number of
* fields so far. Note that requested might actually refer to * fields so far. Note that requested might actually refer to
* a field that is beyond the end of the record, but we won't * a field that is beyond the end of the record, but we won't
* set NF to that value at this point, since this is only a * set NF to that value at this point, since this is only a
* reference to the field and NF only gets set if the field * reference to the field and NF only gets set if the field
* is assigned to -- this case is handled below * is assigned to -- this case is handled below
*/ */
if (parse_extent == fields_arr[0]->stptr + fields_arr[0]->stlen) if (parse_extent == fields_arr[0]->stptr + fields_arr[0]->stlen)
NF = parse_high_water; NF = parse_high_water;
else if (parse_field == fpat_parse_field) {
/* FPAT parsing is weird, isolate the special cases */
char *rec_start = fields_arr[0]->stptr;
char *rec_end = fields_arr[0]->stptr + fields_arr[0]->stl
en;
if ( parse_extent > rec_end
|| (parse_extent > rec_start && parse_extent < rec_en
d && requested == UNLIMITED-1))
NF = parse_high_water;
else if (parse_extent == rec_start) /* could be no match
for FPAT */
NF = 0;
}
if (requested == UNLIMITED - 1) /* UNLIMITED-1 means set NF */ if (requested == UNLIMITED - 1) /* UNLIMITED-1 means set NF */
requested = parse_high_water; requested = parse_high_water;
} }
if (parse_high_water < requested) { /* requested beyond end of record */ if (parse_high_water < requested) { /* requested beyond end of record */
if (assign != NULL) { /* expand record */ if (assign != NULL) { /* expand record */
if (requested > nf_high_water) if (requested > nf_high_water)
grow_fields_arr(requested); grow_fields_arr(requested);
NF = requested; NF = requested;
parse_high_water = requested; parse_high_water = requested;
skipping to change at line 1313 skipping to change at line 1315
if (! do_traditional && fs->stlen == 0) { if (! do_traditional && fs->stlen == 0) {
static bool warned = false; static bool warned = false;
set_parser(null_parse_field); set_parser(null_parse_field);
if (do_lint_extensions && ! warned) { if (do_lint_extensions && ! warned) {
warned = true; warned = true;
lintwarn(_("null string for `FS' is a gawk extension")); lintwarn(_("null string for `FS' is a gawk extension"));
} }
} else if (fs->stlen > 1) { } else if (fs->stlen > 1 || (fs->flags & REGEX) != 0) {
if (do_lint_old) if (do_lint_old)
warning(_("old awk does not support regexps as value of ` FS'")); lintwarn(_("old awk does not support regexps as value of `FS'"));
set_parser(re_parse_field); set_parser(re_parse_field);
} else if (RS_is_null) { } else if (RS_is_null) {
/* we know that fs->stlen <= 1 */ /* we know that fs->stlen <= 1 */
set_parser(sc_parse_field); set_parser(sc_parse_field);
if (fs->stlen == 1) { if (fs->stlen == 1) {
if (fs->stptr[0] == ' ') { if (fs->stptr[0] == ' ') {
default_FS = true; default_FS = true;
strcpy(buf, "[ \t\n]+"); strcpy(buf, "[ \t\n]+");
} else if (fs->stptr[0] == '\\') { } else if (fs->stptr[0] == '\\') {
/* yet another special case */ /* yet another special case */
skipping to change at line 1436 skipping to change at line 1438
tmp = make_string(subscript, strlen(subscript)); tmp = make_string(subscript, strlen(subscript));
assoc_set(PROCINFO_node, tmp, make_number(val)); assoc_set(PROCINFO_node, tmp, make_number(val));
} }
/* set_FPAT --- handle an assignment to FPAT */ /* set_FPAT --- handle an assignment to FPAT */
void void
set_FPAT() set_FPAT()
{ {
static bool warned = false; static bool warned = false;
static NODE *save_fpat = NULL;
bool remake_re = true; bool remake_re = true;
NODE *fpat; NODE *fpat;
if (do_lint_extensions && ! warned) { if (do_lint_extensions && ! warned) {
warned = true; warned = true;
lintwarn(_("`FPAT' is a gawk extension")); lintwarn(_("`FPAT' is a gawk extension"));
} }
if (do_traditional) /* quick and dirty, does the trick */ if (do_traditional) /* quick and dirty, does the trick */
return; return;
skipping to change at line 1459 skipping to change at line 1460
* semantics, and force $0 to be split totally. * semantics, and force $0 to be split totally.
*/ */
if (fields_arr != NULL) if (fields_arr != NULL)
(void) get_field(UNLIMITED - 1, 0); (void) get_field(UNLIMITED - 1, 0);
/* It's possible that only IGNORECASE changed, or FPAT = FPAT */ /* It's possible that only IGNORECASE changed, or FPAT = FPAT */
/* /*
* This comparison can't use cmp_nodes(), which pays attention * This comparison can't use cmp_nodes(), which pays attention
* to IGNORECASE, and that's not what we want. * to IGNORECASE, and that's not what we want.
*/ */
if (save_fpat if (save_FPAT
&& FPAT_node->var_value->stlen == save_fpat->stlen && FPAT_node->var_value->stlen == save_FPAT->stlen
&& memcmp(FPAT_node->var_value->stptr, save_fpat->stptr, save_fpa && memcmp(FPAT_node->var_value->stptr, save_FPAT->stptr, save_FPA
t->stlen) == 0) { T->stlen) == 0) {
if (FPAT_regexp != NULL) if (FPAT_regexp != NULL)
FPAT_regexp = (IGNORECASE ? FPAT_re_no_case : FPAT_re_yes _case); FPAT_regexp = (IGNORECASE ? FPAT_re_no_case : FPAT_re_yes _case);
/* FPAT = FPAT */ /* FPAT = FPAT */
if (current_field_sep() == Using_FPAT) { if (current_field_sep() == Using_FPAT) {
return; return;
} else { } else {
remake_re = false; remake_re = false;
goto set_fpat_function; goto set_fpat_function;
} }
} }
unref(save_fpat); unref(save_FPAT);
save_fpat = dupnode(FPAT_node->var_value); save_FPAT = dupnode(FPAT_node->var_value);
refree(FPAT_re_yes_case); refree(FPAT_re_yes_case);
refree(FPAT_re_no_case); refree(FPAT_re_no_case);
FPAT_re_yes_case = FPAT_re_no_case = FPAT_regexp = NULL; FPAT_re_yes_case = FPAT_re_no_case = FPAT_regexp = NULL;
set_fpat_function: set_fpat_function:
fpat = force_string(FPAT_node->var_value); fpat = force_string(FPAT_node->var_value);
set_parser(fpat_parse_field); set_parser(fpat_parse_field);
if (remake_re) { if (remake_re) {
refree(FPAT_re_yes_case); refree(FPAT_re_yes_case);
skipping to change at line 1568 skipping to change at line 1569
* delete seps * delete seps
* *
* # Loop that consumes the whole record * # Loop that consumes the whole record
* while (parse_start <= length(string)) { # still something to parse * while (parse_start <= length(string)) { # still something to parse
* *
* # first attempt to match the next field * # first attempt to match the next field
* sep_start = parse_start * sep_start = parse_start
* field_found = match(substr(string, parse_start), pattern) * field_found = match(substr(string, parse_start), pattern)
* *
* # check for an invalid null field and retry one character away * # check for an invalid null field and retry one character away
* if (nf > 0 && field_found && RSTART==1 && RLENGTH==0) { * if (nf > 0 && field_found && RSTART == 1 && RLENGTH == 0) {
* parse_start++ * parse_start++
* field_found = match(substr(string, parse_start), pattern) * field_found = match(substr(string, parse_start), pattern)
* } * }
* *
* # store the (sep[n-1],field[n]) pair * # store the (sep[n-1],field[n]) pair
* if (field_found) { * if (field_found) {
* field_start = parse_start + RSTART - 1 * field_start = parse_start + RSTART - 1
* field_length = RLENGTH * field_length = RLENGTH
* seps[nf] = substr(string, sep_start, field_start-sep_start) * seps[nf] = substr(string, sep_start, field_start-sep_start)
* fields[++nf] = substr(string, field_start, field_length) * fields[++nf] = substr(string, field_start, field_length)
 End of changes. 17 change blocks. 
34 lines changed or deleted 33 lines changed or added

Home  |  About  |  Features  |  All  |  Newest  |  Dox  |  Diffs  |  RSS Feeds  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTP(S)