"Fossies" - the Fresh Open Source Software Archive

Member "gretl-2020b/lib/src/dataio.c" (2 Apr 2020, 85281 Bytes) of package /linux/misc/gretl-2020b.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "dataio.c" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 2020a_vs_2020b.

    1 /*
    2  *  gretl -- Gnu Regression, Econometrics and Time-series Library
    3  *  Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
    4  *
    5  *  This program is free software: you can redistribute it and/or modify
    6  *  it under the terms of the GNU General Public License as published by
    7  *  the Free Software Foundation, either version 3 of the License, or
    8  *  (at your option) any later version.
    9  *
   10  *  This program is distributed in the hope that it will be useful,
   11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
   12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   13  *  GNU General Public License for more details.
   14  *
   15  *  You should have received a copy of the GNU General Public License
   16  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
   17  *
   18  */
   19 
   20 #include "libgretl.h"
   21 #include "uservar.h"
   22 #include "dbwrite.h"
   23 #include "libset.h"
   24 #include "gretl_func.h"
   25 #include "gretl_xml.h"
   26 #include "gretl_panel.h"
   27 #include "gretl_string_table.h"
   28 #include "csvdata.h"
   29 #include "usermat.h"
   30 
   31 #include <ctype.h>
   32 #include <time.h>
   33 #include <errno.h>
   34 #include <unistd.h>
   35 
   36 #include <glib.h>
   37 
   38 #define MERGE_DEBUG 0
   39 
   40 /**
   41  * SECTION:dataio
   42  * @short_description: data handling (internal)
   43  * @title: Data support
   44  * @include: gretl/libgretl.h
   45  *
   46  * The following data handling functions are basically internal to
   47  * gretl and not in a state where they can be readily
   48  * documented as public APIs.
   49  *
   50  */
   51 
   52 typedef enum {
   53     GRETL_FMT_GDT,       /* standard gretl XML data */
   54     GRETL_FMT_BINARY,    /* native XML + binary data */
   55     GRETL_FMT_OCTAVE,    /* data in Gnu Octave format */
   56     GRETL_FMT_CSV,       /* data in Comma Separated Values format */
   57     GRETL_FMT_R,         /* data in Gnu R format */
   58     GRETL_FMT_DAT,       /* data in PcGive format */
   59     GRETL_FMT_DB,        /* gretl native database format */
   60     GRETL_FMT_JM,        /* JMulti ascii data */
   61     GRETL_FMT_DTA        /* Stata .dta format */
   62 } GretlDataFormat;
   63 
   64 #define IS_DATE_SEP(c) (c == '.' || c == ':' || c == ',')
   65 
   66 #define PROGRESS_BAR "progress_bar"
   67 
   68 /**
   69  * get_date_x:
   70  * @pd: frequency of data.
   71  * @obs: observation string.
   72  *
   73  * Returns: the floating-point representation of @obs.
   74  */
   75 
   76 double get_date_x (int pd, const char *obs)
   77 {
   78     double x = 1.0;
   79 
   80     if ((pd == 5 || pd == 6 || pd == 7 || pd == 52) && strlen(obs) > 4) {
   81     /* calendar data */
   82     guint32 ed = get_epoch_day(obs);
   83 
   84     if (ed > 0) {
   85         x = ed;
   86     }
   87     } else {
   88     x = obs_str_to_double(obs);
   89     }
   90 
   91     return x;
   92 }
   93 
   94 static int real_check_varname (const char *vname,
   95                    int is_series)
   96 {
   97     int testchar = 'a';
   98     int firstbad = 0;
   99     int err = 0;
  100 
  101     gretl_error_clear();
  102 
  103     if (vname == NULL || *vname == '\0') {
  104     gretl_errmsg_set("Expected an identifier");
  105     return E_PARSE;
  106     }
  107 
  108     if (strlen(vname) >= VNAMELEN) {
  109     gretl_errmsg_set(_("Varname exceeds the maximum of 31 characters"));
  110     err = E_DATA;
  111     } else if (strcmp(vname, "return") && gretl_reserved_word(vname)) {
  112     err = E_DATA;
  113     } else if (!(isalpha((unsigned char) *vname))) {
  114     firstbad = 1;
  115     testchar = *vname;
  116         err = E_DATA;
  117     } else if (is_series && (function_lookup(vname) ||
  118                  get_user_function_by_name(vname) ||
  119                  is_function_alias(vname))) {
  120     gretl_warnmsg_sprintf(_("'%s' shadows a function of the same name"),
  121                   vname);
  122     } else {
  123     const char *p = vname;
  124 
  125     while (*p && testchar == 'a') {
  126         if (!(isalpha((unsigned char) *p))
  127         && !(isdigit((unsigned char) *p))
  128         && *p != '_') {
  129         testchar = *p;
  130         err = E_DATA;
  131         }
  132         p++;
  133     }
  134     }
  135 
  136     if (err && !is_series && strlen(vname) == 2 && is_greek_letter(vname)) {
  137     return 0;
  138     }
  139 
  140     if (testchar != 'a') {
  141     if (isprint((unsigned char) testchar)) {
  142         if (firstbad) {
  143         gretl_errmsg_sprintf(_("First char of varname '%s' is bad\n"
  144                        "(first must be alphabetical)"),
  145                      vname);
  146         } else {
  147         gretl_errmsg_sprintf(_("Varname '%s' contains illegal character '%c'\n"
  148                        "Use only letters, digits and underscore"),
  149                      vname, (unsigned char) testchar);
  150         }
  151     } else {
  152         if (firstbad) {
  153         gretl_errmsg_sprintf(_("First char of varname (0x%x) is bad\n"
  154                        "(first must be alphabetical)"),
  155                      (unsigned) testchar);
  156         } else {
  157         gretl_errmsg_sprintf(_("Varname contains illegal character 0x%x\n"
  158                        "Use only letters, digits and underscore"),
  159                      (unsigned) testchar);
  160         }
  161     }
  162     }
  163 
  164     return err;
  165 }
  166 
  167 /**
  168  * check_varname:
  169  * @varname: putative name for variable (or object).
  170  *
  171  * Check a variable/object name for legality: the name
  172  * must start with a letter, and be composed of letters,
  173  * numbers or the underscore character, and nothing else.
  174  *
  175  * Returns: 0 if name is OK, non-zero if not.
  176  */
  177 
  178 int check_varname (const char *varname)
  179 {
  180     return real_check_varname(varname, 1);
  181 }
  182 
  183 int check_identifier (const char *varname)
  184 {
  185     /* FIXME series case? */
  186     return real_check_varname(varname, 0);
  187 }
  188 
  189 static int bad_date_string (const char *s)
  190 {
  191     int err = 0;
  192 
  193     gretl_error_clear();
  194 
  195     while (*s && !err) {
  196     if (!isdigit((unsigned char) *s) && !IS_DATE_SEP(*s)) {
  197         if (isprint((unsigned char) *s)) {
  198         gretl_errmsg_sprintf(_("Bad character '%c' in date string"), *s);
  199         } else {
  200         gretl_errmsg_sprintf(_("Bad character %d in date string"), *s);
  201         }
  202         err = 1;
  203     }
  204     s++;
  205     }
  206 
  207     return err;
  208 }
  209 
  210 static void maybe_unquote_label (char *targ, const char *src)
  211 {
  212     if (*src == '"' || *src == '\'') {
  213     int n;
  214 
  215     strcpy(targ, src + 1);
  216     n = strlen(targ);
  217     if (n > 0 && (targ[n-1] == '"' || targ[n-1] == '\'')) {
  218         targ[n-1] = '\0';
  219     }
  220     } else {
  221     strcpy(targ, src);
  222     }
  223 }
  224 
  225 static int get_dot_pos (const char *s)
  226 {
  227     int i, pos = 0;
  228 
  229     for (i=0; *s != '\0'; i++, s++) {
  230     if (IS_DATE_SEP(*s)) {
  231         pos = i;
  232         break;
  233     }
  234     }
  235 
  236     return pos;
  237 }
  238 
  239 #define DATES_DEBUG 0
  240 
  241 static int match_obs_marker (const char *s, const DATASET *dset)
  242 {
  243     char test[OBSLEN];
  244     int t;
  245 
  246 #if DATES_DEBUG
  247     fprintf(stderr, "dateton: checking '%s' against marker strings\n", s);
  248 #endif
  249 
  250     maybe_unquote_label(test, s);
  251 
  252     for (t=0; t<dset->n; t++) {
  253     if (!strcmp(test, dset->S[t])) {
  254         /* handled */
  255         return t;
  256     }
  257     }
  258 
  259     return -1;
  260 }
  261 
  262 static int caldate_precheck (const char *s, int *y2, int *slashed)
  263 {
  264     int n = strlen(s);
  265     int ok = 0;
  266 
  267     if (n < 8 || !isdigit(s[0]) || !isdigit(s[1])) {
  268     /* can't be date */
  269     return 0;
  270     }
  271 
  272     if (n == 10) {
  273     if (s[4] == '-' && s[7] == '-') {
  274         ok = 1; /* could be YYYY-MM-DD */
  275     } else if (s[4] == '/' && s[7] == '/') {
  276         *slashed = 1; /* could be YYYY/MM/DD */
  277         ok = 1;
  278     }
  279     } else if (n == 8) {
  280     if (s[2] == '-' && s[5] == '-') {
  281         ok = 1; /* could be YY-MM-DD */
  282     } else if (s[2] == '/' && s[5] == '/') {
  283         *slashed = 1; /* could be YY/MM/DD */
  284         ok = 1;
  285     }
  286     if (ok) *y2 = 1;
  287     }
  288 
  289     return ok;
  290 }
  291 
  292 static int datecmp (const char *s1, int y21, int slash1,
  293             const char *s2, int y22, int slash2)
  294 {
  295     if (y21 && !y22) {
  296     s2 += 2;
  297     } else if (!y21 && y22) {
  298     s1 += 2;
  299     }
  300     if (slash1 && !slash2) {
  301     char revs1[OBSLEN];
  302 
  303     strcpy(revs1, s1);
  304     gretl_charsub(revs1, '/', '-');
  305     return strcmp(revs1, s2);
  306     } else if (slash2 && !slash1) {
  307     char revs2[OBSLEN];
  308 
  309     strcpy(revs2, s2);
  310     gretl_charsub(revs2, '/', '-');
  311     return strcmp(s1, revs2);
  312     } else {
  313     return strcmp(s1, s2);
  314     }
  315 }
  316 
  317 static int
  318 real_dateton (const char *date, const DATASET *dset, int nolimit)
  319 {
  320     int handled = 0;
  321     int t, n = -1;
  322 
  323     /* first check if this is calendar data and if so,
  324        treat accordingly */
  325 
  326     if (calendar_data(dset)) {
  327 #if DATES_DEBUG
  328     fprintf(stderr, "dateton: treating as calendar data\n");
  329 #endif
  330     if (dataset_has_markers(dset)) {
  331         /* "hard-wired" calendar dates as strings */
  332         int tryit, y21 = 0, slash1 = 0;
  333         int y22 = 0, slash2 = 0;
  334 
  335         tryit = caldate_precheck(date, &y21, &slash1);
  336         if (!tryit) {
  337         return -1;
  338         }
  339         tryit = caldate_precheck(dset->S[0], &y22, &slash2);
  340         if (!tryit) {
  341         return -1;
  342         }
  343         for (t=0; t<dset->n; t++) {
  344         if (!datecmp(date, y21, slash1, dset->S[t], y22, slash2)) {
  345             /* handled */
  346             return t;
  347         }
  348         }
  349         /* out of options: abort */
  350         return -1;
  351     } else {
  352         /* automatic calendar dates */
  353         n = calendar_obs_number(date, dset);
  354         handled = 1;
  355     }
  356     } else if (dataset_is_daily(dset) ||
  357            dataset_is_weekly(dset)) {
  358 #if DATES_DEBUG
  359     fprintf(stderr, "dateton: trying undated time series\n");
  360 #endif
  361     t = positive_int_from_string(date);
  362     if (t > 0) {
  363         n = t - 1;
  364         handled = 1;
  365     }
  366     } else if (dataset_is_decennial(dset)) {
  367     t = positive_int_from_string(date);
  368     if (t > 0) {
  369         n = (t - dset->sd0) / 10;
  370         handled = 1;
  371     }
  372     } else if (dataset_has_markers(dset)) {
  373     t = match_obs_marker(date, dset);
  374     if (t >= 0) {
  375         return t;
  376     }
  377     /* else maybe just a straight obs number */
  378     t = positive_int_from_string(date);
  379     if (t > 0) {
  380         n = t - 1;
  381         handled = 1;
  382     }
  383     }
  384 
  385     if (!handled) {
  386     int pos1, pos2;
  387 
  388 #if DATES_DEBUG
  389     fprintf(stderr, "dateton: treating %s as regular numeric obs\n",
  390         date);
  391 #endif
  392     if (bad_date_string(date)) {
  393         return -1;
  394     }
  395 
  396     pos1 = get_dot_pos(date);
  397     pos2 = get_dot_pos(dset->stobs);
  398 
  399     if ((pos1 && !pos2) || (pos2 && !pos1)) {
  400         gretl_errmsg_sprintf(_("'%s': invalid observation index"),
  401                  date);
  402     } else if (!pos1 && !pos2) {
  403         n = atoi(date) - atoi(dset->stobs);
  404     } else if (pos1 > OBSLEN - 2) {
  405         gretl_errmsg_sprintf(_("'%s': invalid observation index"),
  406                  date);
  407     } else {
  408         char tmp[OBSLEN];
  409         int maj, min;
  410 
  411         *tmp = '\0';
  412         strncat(tmp, date, OBSLEN-1);
  413         tmp[pos1] = '\0';
  414         maj = positive_int_from_string(tmp);
  415         min = positive_int_from_string(tmp + pos1 + 1);
  416 
  417         if (maj <= 0 || min <= 0 || min > dset->pd) {
  418         gretl_errmsg_sprintf(_("'%s': invalid observation index"),
  419                      date);
  420         n = -1;
  421         } else {
  422         int maj0, min0;
  423 
  424         *tmp = '\0';
  425         strncat(tmp, dset->stobs, OBSLEN-1);
  426         tmp[pos2] = '\0';
  427         maj0 = atoi(tmp);
  428         min0 = atoi(tmp + pos2 + 1);
  429 
  430         n = dset->pd * (maj - maj0) + (min - min0);
  431         }
  432     }
  433     }
  434 
  435     if (!nolimit && dset->n > 0 && n >= dset->n) {
  436     fprintf(stderr, "n = %d, dset->n = %d: out of bounds\n", n, dset->n);
  437     gretl_errmsg_set(_("Observation number out of bounds"));
  438     n = -1;
  439     }
  440 
  441 #if DATES_DEBUG
  442     fprintf(stderr, "dateton: returning %d\n", n);
  443 #endif
  444 
  445     return n;
  446 }
  447 
  448 /**
  449  * dateton:
  450  * @date: string representation of date for processing.
  451  * @dset: pointer to data information struct.
  452  *
  453  * Determines the observation number corresponding to @date,
  454  * relative to @dset. It is an error if @date represents an
  455  * observation that lies outside of the full data range
  456  * specified in @dset.
  457  *
  458  * Returns: zero-based observation number, or -1 on error.
  459  */
  460 
  461 int dateton (const char *date, const DATASET *dset)
  462 {
  463     return real_dateton(date, dset, 0);
  464 }
  465 
  466 /**
  467  * merge_dateton:
  468  * @date: string representation of date for processing.
  469  * @dset: pointer to data information struct.
  470  *
  471  * Works just as dateton(), except that for this function it
  472  * is not an error if @date represents an observation that
  473  * lies beyond the data range specified in @dset. This is
  474  * inended for use when merging data, or when creating a new
  475  * dataset.
  476  *
  477  * Returns: zero-based observation number, or -1 on error.
  478  */
  479 
  480 int merge_dateton (const char *date, const DATASET *dset)
  481 {
  482     return real_dateton(date, dset, 1);
  483 }
  484 
  485 static char *panel_obs (char *s, int t, const DATASET *dset)
  486 {
  487     int i = t / dset->pd + 1;
  488     int j = (t + 1) % dset->pd;
  489     int d = 1 + floor(log10(dset->pd));
  490 
  491     if (j == 0) {
  492     j = dset->pd;
  493     }
  494 
  495     sprintf(s, "%d:%0*d", i, d, j);
  496 
  497     return s;
  498 }
  499 
  500 /**
  501  * ntodate:
  502  * @datestr: char array to which date is to be printed.
  503  * @t: zero-based observation number.
  504  * @dset: data information struct.
  505  *
  506  * Prints to @datestr (which must be at least #OBSLEN bytes)
  507  * the calendar representation of observation number @t.
  508  *
  509  * Returns: the observation string.
  510  */
  511 
  512 char *ntodate (char *datestr, int t, const DATASET *dset)
  513 {
  514     double x;
  515 
  516 #if 0
  517     fprintf(stderr, "ntodate: t=%d, pd=%d, sd0=%g, incoming stobs='%s'\n",
  518         t, dset->pd, dset->sd0, dset->stobs);
  519 #endif
  520 
  521     if (calendar_data(dset)) {
  522     /* handles both daily and dated weekly data */
  523     if (dataset_has_markers(dset)) {
  524         strcpy(datestr, dset->S[t]);
  525     } else {
  526         calendar_date_string(datestr, t, dset);
  527     }
  528     return datestr;
  529     } else if (dataset_is_daily(dset) ||
  530            dataset_is_weekly(dset)) {
  531     /* undated time series */
  532     x = date_as_double(t, 1, dset->sd0);
  533     sprintf(datestr, "%d", (int) x);
  534     return datestr;
  535     } else if (dataset_is_decennial(dset)) {
  536     x = dset->sd0 + 10 * t;
  537     sprintf(datestr, "%d", (int) x);
  538     return datestr;
  539     } else if (dataset_is_panel(dset)) {
  540     panel_obs(datestr, t, dset);
  541     return datestr;
  542     }
  543 
  544     x = date_as_double(t, dset->pd, dset->sd0);
  545 
  546     if (dset->pd == 1) {
  547         sprintf(datestr, "%d", (int) x);
  548     } else {
  549     int pdp = dset->pd;
  550     short len = 1;
  551     char fmt[10];
  552 
  553     while ((pdp = pdp / 10)) len++;
  554     sprintf(fmt, "%%.%df", len);
  555     sprintf(datestr, fmt, x);
  556     colonize_obs(datestr);
  557     }
  558 
  559     return datestr;
  560 }
  561 
  562 /* print observation date in ISO 8601 extended format */
  563 
  564 char *ntodate_8601 (char *datestr, int t, const DATASET *dset)
  565 {
  566     *datestr = '\0';
  567 
  568     if (calendar_data(dset)) {
  569     if (dataset_has_markers(dset)) {
  570         strcpy(datestr, dset->S[t]);
  571     } else {
  572         calendar_date_string(datestr, t, dset);
  573     }
  574     } else if (dataset_is_decennial(dset)) {
  575     double x = dset->sd0 + 10 * t;
  576     int yr = lrint(x);
  577 
  578     sprintf(datestr, "%d-01-01", yr);
  579     } else {
  580     double x = date_as_double(t, dset->pd, dset->sd0);
  581     int maj = lrint(floor(x));
  582 
  583     if (dset->pd == 1) {
  584         sprintf(datestr, "%d-01-01", maj);
  585     } else if (dset->pd == 12) {
  586         int min = lrint(100 * (x - floor(x)));
  587 
  588         sprintf(datestr, "%d-%02d-01", maj, min);
  589     } else if (dset->pd == 4) {
  590         int min = lrint(10 * (x - floor(x)));
  591         int mo = min==2 ? 4 : min==3? 7 : min==4? 10 : min;
  592 
  593         sprintf(datestr, "%d-%02d-01", maj, mo);
  594     }
  595     }
  596 
  597     return datestr;
  598 }
  599 
  600 #define xround(x) (((x-floor(x))>.5)? ceil(x) : floor(x))
  601 
  602 /**
  603  * get_subperiod:
  604  * @t: zero-based observation number.
  605  * @dset: data information struct.
  606  * @err: location to receive error code, or NULL.
  607  *
  608  * For "seasonal" time series data (in a broad sense),
  609  * determines the sub-period at observation @t. The "sub-period"
  610  * might be a quarter, month, hour or whatever.  The value
  611  * returned is zero-based (e.g. first quarter = 0).
  612  * If the data are not "seasonal", 0 is returned and if
  613  * @err is non-NULL it receives a non-zero error code.
  614  *
  615  * Returns: the sub-period.
  616  */
  617 
  618 int get_subperiod (int t, const DATASET *dset, int *err)
  619 {
  620     int ret = 0;
  621 
  622     if (!dataset_is_seasonal(dset)) {
  623     if (err != NULL) {
  624         *err = E_PDWRONG;
  625     }
  626     return 0;
  627     }
  628 
  629     if (dataset_is_weekly(dset)) {
  630     /* bodge -- what else to do? */
  631     ret = t % dset->pd;
  632     } else if (calendar_data(dset)) {
  633     /* dated daily data */
  634     char datestr[12];
  635 
  636     calendar_date_string(datestr, t, dset);
  637     ret = weekday_from_date(datestr);
  638     } else if (dataset_is_daily(dset)) {
  639     /* bodge, again */
  640     ret = t % dset->pd;
  641     } else {
  642     /* quarterly, monthly, hourly... */
  643     double x = date_as_double(t, dset->pd, dset->sd0);
  644     int i, d = ceil(log10(dset->pd));
  645 
  646     x -= floor(x);
  647     for (i=0; i<d; i++) {
  648         x *= 10;
  649     }
  650     ret = xround(x) - 1;
  651     }
  652 
  653     return ret;
  654 }
  655 
  656 /**
  657  * get_precision:
  658  * @x: data array.
  659  * @n: length of @x.
  660  * @placemax: the maximum number of decimal places to try.
  661  *
  662  * Find the number of decimal places required to represent a given
  663  * data series uniformly and accurately, if possible.
  664  *
  665  * Returns: the required number of decimal places or
  666  * #PMAX_NOT_AVAILABLE if it can't be done.
  667  */
  668 
  669 int get_precision (const double *x, int n, int placemax)
  670 {
  671     int t, p, pmax = 0;
  672     char *s, numstr[64];
  673     int len, n_ok = 0;
  674     double z;
  675 
  676     for (t=0; t<n; t++) {
  677     if (!na(x[t])) {
  678         z = fabs(x[t]);
  679         /* escape clause: numbers are too big or too small for
  680            this treatment */
  681         if (z > 0 && (z < 1.0e-6 || z > 1.0e+8)) {
  682         return PMAX_NOT_AVAILABLE;
  683         }
  684         n_ok++;
  685     }
  686     }
  687 
  688     if (n_ok == 0) {
  689     return PMAX_NOT_AVAILABLE;
  690     }
  691 
  692     for (t=0; t<n; t++) {
  693     if (!na(x[t])) {
  694         p = placemax;
  695         sprintf(numstr, "%.*f", p, fabs(x[t]));
  696         /* go to the end and drop trailing zeros */
  697         len = strlen(numstr);
  698         s = numstr + len - 1;
  699         while (*s-- == '0') {
  700         p--;
  701         len--;
  702         }
  703         if (len > 10) {
  704         /* this is going to be too big */
  705         return PMAX_NOT_AVAILABLE;
  706         }
  707         if (p > pmax) {
  708         pmax = p;
  709         }
  710     }
  711     }
  712 
  713     return pmax;
  714 }
  715 
  716 gretlopt data_save_opt_from_suffix (const char *fname)
  717 {
  718     gretlopt opt = OPT_NONE;
  719 
  720     if (has_suffix(fname, ".R")) {
  721     opt = OPT_R;
  722     } else if (has_suffix(fname, ".m")) {
  723     opt = OPT_M;
  724     } else if (has_suffix(fname, ".csv") ||
  725            has_suffix(fname, ".txt") ||
  726            has_suffix(fname, ".asc")) {
  727     opt = OPT_C;
  728     }
  729 
  730     return opt;
  731 }
  732 
  733 struct extmap {
  734     GretlFileType ftype;
  735     const char *ext;
  736 };
  737 
  738 static struct extmap data_ftype_map[] = {
  739     { GRETL_XML_DATA,     ".gdt" },
  740     { GRETL_BINARY_DATA,  ".gdtb" },
  741     { GRETL_CSV,          ".csv" },
  742     { GRETL_OCTAVE,       ".m" },
  743     { GRETL_GNUMERIC,     ".gnumeric" },
  744     { GRETL_XLS,          ".xls" },
  745     { GRETL_XLSX,         ".xlsx" },
  746     { GRETL_ODS,          ".ods" },
  747     { GRETL_WF1,          ".wf1" },
  748     { GRETL_DTA,          ".dta" },
  749     { GRETL_SAV,          ".sav" },
  750     { GRETL_SAS,          ".xpt" },
  751     { GRETL_JMULTI,       ".dat" }
  752 };
  753 
  754 static const char *get_filename_extension (const char *fname)
  755 {
  756     const char *ext = strrchr(fname, '.');
  757 
  758     if (ext != NULL && strchr(ext, '/')) {
  759     /* the rightmost dot is not in the basename */
  760     ext = NULL;
  761     }
  762 
  763 #ifdef WIN32
  764     if (ext != NULL && strchr(ext, '\\')) {
  765     ext = NULL;
  766     }
  767 #endif
  768 
  769     return ext;
  770 }
  771 
  772 static GretlFileType data_file_type_from_extension (const char *ext)
  773 {
  774     int i, n = G_N_ELEMENTS(data_ftype_map);
  775 
  776     for (i=0; i<n; i++) {
  777     if (!g_ascii_strcasecmp(ext, data_ftype_map[i].ext)) {
  778         return data_ftype_map[i].ftype;
  779     }
  780     }
  781 
  782     /* a few extras */
  783     if (!g_ascii_strcasecmp(ext, ".txt") ||
  784     !g_ascii_strcasecmp(ext, ".asc")) {
  785     return GRETL_CSV;
  786     }
  787 
  788     return GRETL_UNRECOGNIZED;
  789 }
  790 
  791 GretlFileType data_file_type_from_name (const char *fname)
  792 {
  793     const char *ext = strrchr(fname, '.');
  794 
  795     if (ext != NULL && strchr(ext, '/')) {
  796     /* the rightmost dot is not in the basename */
  797     ext = NULL;
  798     }
  799 
  800 #ifdef WIN32
  801     if (ext != NULL && strchr(ext, '\\')) {
  802     ext = NULL;
  803     }
  804 #endif
  805 
  806     if (ext != NULL) {
  807     return data_file_type_from_extension(ext);
  808     }
  809 
  810     return GRETL_UNRECOGNIZED;
  811 }
  812 
  813 #define non_native(o) (o & (OPT_M | OPT_R | OPT_C | OPT_D | OPT_G | OPT_J))
  814 
  815 static GretlDataFormat
  816 format_from_opt_or_name (gretlopt opt, const char *fname,
  817              char *delim, int *add_ext,
  818              int *err)
  819 {
  820     GretlDataFormat fmt = GRETL_FMT_GDT;
  821 
  822     if (has_suffix(fname, ".gdt")) {
  823     if (non_native(opt)) {
  824         *err = E_BADOPT;
  825     }
  826     return GRETL_FMT_GDT;
  827     } else if (has_suffix(fname, ".gdtb")) {
  828     if (non_native(opt)) {
  829         *err = E_BADOPT;
  830     }
  831     return GRETL_FMT_BINARY;
  832     }
  833 
  834     if (opt & OPT_M) {
  835     fmt = GRETL_FMT_OCTAVE;
  836     } else if (opt & OPT_R) {
  837     fmt = GRETL_FMT_R;
  838     } else if (opt & OPT_C) {
  839     fmt = GRETL_FMT_CSV;
  840     } else if (opt & OPT_D) {
  841     fmt = GRETL_FMT_DB;
  842     } else if (opt & OPT_G) {
  843     fmt = GRETL_FMT_DAT;
  844     } else if (opt & OPT_J) {
  845     fmt = GRETL_FMT_JM;
  846     }
  847 
  848     if (fmt == GRETL_FMT_GDT) {
  849     if (has_suffix(fname, ".R")) {
  850         fmt = GRETL_FMT_R;
  851     } else if (has_suffix(fname, ".csv")) {
  852         fmt = GRETL_FMT_CSV;
  853     } else if (has_suffix(fname, ".m")) {
  854         fmt = GRETL_FMT_OCTAVE;
  855     } else if (has_suffix(fname, ".txt") ||
  856            has_suffix(fname, ".asc")) {
  857         fmt = GRETL_FMT_CSV;
  858         *delim = ' ';
  859     } else if (has_suffix(fname, ".dta")) {
  860         fmt = GRETL_FMT_DTA;
  861     } else if (has_suffix(fname, ".bin")) {
  862         fmt = GRETL_FMT_DB;
  863     }
  864     }
  865 
  866     if (fmt == GRETL_FMT_GDT) {
  867     *add_ext = 1;
  868     }
  869 
  870     return fmt;
  871 }
  872 
  873 void date_maj_min (int t, const DATASET *dset, int *maj, int *min)
  874 {
  875     char obs[OBSLEN];
  876 
  877     ntodate(obs, t, dset);
  878 
  879     if (maj != NULL) {
  880     *maj = atoi(obs);
  881     }
  882 
  883     if (min != NULL) {
  884     char *s, sep = ':';
  885 
  886     if (strchr(obs, sep) == NULL) {
  887         if (dset->pd == 4 && strchr(obs, 'Q')) {
  888         sep = 'Q';
  889         } else if (dset->pd == 12 && strchr(obs, 'M')) {
  890         sep = 'M';
  891         }
  892     }
  893 
  894     s = strchr(obs, sep);
  895     if (s != NULL && strlen(s) > 1) {
  896         *min = atoi(s + 1);
  897     } else {
  898         *min = 1;
  899     }
  900     }
  901 }
  902 
  903 #define NO_PMAX(p,k) (p == NULL || p[k-1] == PMAX_NOT_AVAILABLE)
  904 
  905 #define TMPLEN 64
  906 
  907 static void csv_data_out (const DATASET *dset, const int *list,
  908               int print_obs, int digits, char decpoint,
  909               char delim, FILE *fp)
  910 {
  911     const char *NA = get_csv_na_write_string();
  912     char tmp[TMPLEN];
  913     double xt;
  914     int popit = 0, dotsub = 0;
  915     int t, i, vi;
  916 
  917     if (decpoint == '.' && get_local_decpoint() == ',') {
  918     gretl_push_c_numeric_locale();
  919     popit = 1;
  920     } else if (decpoint == ',' && get_local_decpoint() == '.') {
  921     dotsub = 1;
  922     }
  923 
  924     for (t=dset->t1; t<=dset->t2; t++) {
  925     if (print_obs) {
  926         if (dset->S != NULL) {
  927         fprintf(fp, "\"%s\"%c", dset->S[t], delim);
  928         } else {
  929         ntodate(tmp, t, dset);
  930         if (quarterly_or_monthly(dset)) {
  931             modify_date_for_csv(tmp, dset->pd);
  932         }
  933         fprintf(fp, "%s%c", tmp, delim);
  934         }
  935     }
  936 
  937     for (i=1; i<=list[0]; i++) {
  938         vi = list[i];
  939         xt = dset->Z[vi][t];
  940         if (na(xt)) {
  941         fputs(NA, fp);
  942         } else {
  943         if (is_string_valued(dset, vi)) {
  944             const char *st;
  945 
  946             st = series_get_string_for_obs(dset, vi, t);
  947             if (st != NULL) {
  948             *tmp = '\0';
  949             strcat(tmp, "\"");
  950             strncat(tmp, st, TMPLEN - 3);
  951             strcat(tmp, "\"");
  952             } else {
  953             fprintf(stderr, "missing string at t=%d, vi=%d, xt=%g\n",
  954                 t, vi, xt);
  955             strcpy(tmp, "\"NA\"");
  956             }
  957         } else if (series_is_coded(dset, vi)) {
  958             sprintf(tmp, "\"%d\"", (int) xt);
  959         } else {
  960             sprintf(tmp, "%.*g", digits, xt);
  961         }
  962         if (dotsub) {
  963             gretl_charsub(tmp, '.', ',');
  964         }
  965         fputs(tmp, fp);
  966         }
  967         fputc(i < list[0] ? delim : '\n', fp);
  968     }
  969     }
  970 
  971     if (popit) {
  972     gretl_pop_c_numeric_locale();
  973     }
  974 }
  975 
  976 static int markers_are_unique (const DATASET *dset)
  977 {
  978     int t, s;
  979 
  980     for (t=dset->t1; t<dset->t2; t++) {
  981     for (s=t+1; s<=dset->t2; s++) {
  982         if (strcmp(dset->S[t], dset->S[s]) == 0) {
  983         return 0;
  984         }
  985     }
  986     }
  987 
  988     return 1;
  989 }
  990 
  991 static void R_data_out (const DATASET *dset, const int *list,
  992             int digits, FILE *fp)
  993 {
  994     int print_markers = 0;
  995     double xt;
  996     int t, i, vi;
  997 
  998     if (dset->S != NULL) {
  999     print_markers = markers_are_unique(dset);
 1000     }
 1001 
 1002     for (t=dset->t1; t<=dset->t2; t++) {
 1003     if (print_markers) {
 1004         fprintf(fp, "\"%s\" ", dset->S[t]);
 1005     }
 1006     for (i=1; i<=list[0]; i++) {
 1007         vi = list[i];
 1008         xt = dset->Z[vi][t];
 1009         if (na(xt)) {
 1010         fputs("NA", fp);
 1011         } else if (is_string_valued(dset, vi)) {
 1012         fprintf(fp, "\"%s\"", series_get_string_for_obs(dset, vi, t));
 1013         } else if (series_is_coded(dset, vi)) {
 1014         fprintf(fp, "\"%d\"", (int) xt);
 1015         } else {
 1016         fprintf(fp, "%.*g", digits, xt);
 1017         }
 1018         fputc(i < list[0] ? ' ' : '\n', fp);
 1019     }
 1020     }
 1021 }
 1022 
 1023 static int write_dta_data (const char *fname, const int *list,
 1024                gretlopt opt, const DATASET *dset)
 1025 {
 1026     int (*exporter) (const char *, const int *, gretlopt,
 1027              const DATASET *);
 1028     int err = 0;
 1029 
 1030     exporter = get_plugin_function("stata_export");
 1031 
 1032     if (exporter == NULL) {
 1033         err = 1;
 1034     } else {
 1035     err = (*exporter)(fname, list, opt, dset);
 1036     }
 1037 
 1038     return err;
 1039 }
 1040 
 1041 #define DEFAULT_CSV_DIGITS 15
 1042 
 1043 static int real_write_data (const char *fname, int *list,
 1044                 const DATASET *dset, gretlopt opt,
 1045                 int progress, PRN *prn)
 1046 {
 1047     int i, t, v, l0;
 1048     GretlDataFormat fmt;
 1049     char datfile[MAXLEN];
 1050     int n = dset->n;
 1051     int pop_locale = 0;
 1052     char delim = 0;
 1053     FILE *fp = NULL;
 1054     int freelist = 0;
 1055     int csv_digits = 0;
 1056     int add_ext = 0;
 1057     double xx;
 1058     int err = 0;
 1059 
 1060     gretl_error_clear();
 1061 
 1062     if (list != NULL && list[0] == 0) {
 1063     return E_ARGS;
 1064     }
 1065 
 1066     fmt = format_from_opt_or_name(opt, fname, &delim, &add_ext, &err);
 1067     if (err) {
 1068     return err;
 1069     }
 1070 
 1071     if (list == NULL) {
 1072     list = full_var_list(dset, &l0);
 1073     if (l0 == 0) {
 1074         return E_ARGS;
 1075     } else if (list == NULL) {
 1076         return E_ALLOC;
 1077     } else {
 1078         freelist = 1;
 1079     }
 1080     }
 1081 
 1082     l0 = list[0];
 1083     fname = gretl_maybe_switch_dir(fname);
 1084 
 1085     if (fmt == GRETL_FMT_GDT || fmt == GRETL_FMT_BINARY) {
 1086     /* write native data file (.gdt or .gdtb) */
 1087     err = gretl_write_gdt(fname, list, dset, opt, progress);
 1088     goto write_exit;
 1089     }
 1090 
 1091     if (fmt == GRETL_FMT_DB) {
 1092     /* native type database file */
 1093     err = write_db_data(fname, list, opt, dset);
 1094     goto write_exit;
 1095     }
 1096 
 1097     if (fmt == GRETL_FMT_DTA) {
 1098     /* Stata */
 1099     err = write_dta_data(fname, list, opt, dset);
 1100     goto write_exit;
 1101     }
 1102 
 1103     strcpy(datfile, fname);
 1104 
 1105     /* open file for output */
 1106     fp = gretl_fopen(datfile, "w");
 1107     if (fp == NULL) {
 1108     err = E_FOPEN;
 1109     goto write_exit;
 1110     }
 1111 
 1112     csv_digits = libset_get_int(CSV_DIGITS);
 1113 
 1114     if (csv_digits <= 0) {
 1115     csv_digits = DEFAULT_CSV_DIGITS;
 1116     }
 1117 
 1118     if (fmt != GRETL_FMT_CSV) {
 1119     /* ensure C locale for data output */
 1120     gretl_push_c_numeric_locale();
 1121     pop_locale = 1;
 1122     }
 1123 
 1124     if (fmt == GRETL_FMT_CSV) {
 1125     const char *msg = get_optval_string(STORE, OPT_E);
 1126     char decpoint = get_data_export_decpoint();
 1127     int print_obs = 0;
 1128 
 1129     if (opt & OPT_I) {
 1130         /* the CSV --decimal-comma option */
 1131         decpoint = ',';
 1132         delim = ';';
 1133     } else if (delim == 0) {
 1134         delim = get_data_export_delimiter();
 1135     }
 1136 
 1137     if (msg != NULL && *msg != '\0') {
 1138         fprintf(fp, "# %s\n", msg);
 1139     }
 1140 
 1141     if (!(opt & OPT_X)) {
 1142         /* OPT_X prohibits printing of observation strings */
 1143         print_obs = dataset_is_time_series(dset) || dset->S != NULL;
 1144     }
 1145 
 1146     if (!(opt & OPT_N)) {
 1147         /* header: variable names */
 1148         if (print_obs && (dset->S != NULL || dset->structure != CROSS_SECTION)) {
 1149         fprintf(fp, "obs%c", delim);
 1150         }
 1151         for (i=1; i<l0; i++) {
 1152         fprintf(fp, "%s%c", dset->varname[list[i]], delim);
 1153         }
 1154         fprintf(fp, "%s\n", dset->varname[list[l0]]);
 1155     }
 1156 
 1157     csv_data_out(dset, list, print_obs, csv_digits,
 1158              decpoint, delim, fp);
 1159     } else if (fmt == GRETL_FMT_R) {
 1160     /* friendly to GNU R */
 1161     if (dataset_is_time_series(dset)) {
 1162         char datestr[OBSLEN];
 1163 
 1164         ntodate(datestr, dset->t1, dset);
 1165         fprintf(fp, "# time-series data: start = %s, frequency = %d\n",
 1166             datestr, dset->pd);
 1167     }
 1168 
 1169     for (i=1; i<l0; i++) {
 1170         fprintf(fp, "%s ", dset->varname[list[i]]);
 1171     }
 1172     fprintf(fp, "%s\n", dset->varname[list[l0]]);
 1173 
 1174     R_data_out(dset, list, csv_digits, fp);
 1175     } else if (fmt == GRETL_FMT_OCTAVE) {
 1176     /* GNU Octave: write out data as several matrices (one per
 1177        series) in the same file */
 1178 
 1179     for (i=1; i<=list[0]; i++) {
 1180         v = list[i];
 1181         fprintf(fp, "# name: %s\n# type: matrix\n# rows: %d\n# columns: 1\n",
 1182             dset->varname[v], n);
 1183         for (t=dset->t1; t<=dset->t2; t++) {
 1184         xx = dset->Z[v][t];
 1185         if (na(xx)) {
 1186             fputs("NaN ", fp);
 1187         } else {
 1188             fprintf(fp, "%.*g\n", csv_digits, xx);
 1189         }
 1190         }
 1191     }
 1192     } else if (fmt == GRETL_FMT_DAT) {
 1193     /* PcGive: data file with load info */
 1194     int pd = dset->pd;
 1195 
 1196     for (i=1; i<=list[0]; i++) {
 1197         fprintf(fp, ">%s ", dset->varname[list[i]]);
 1198         if (dset->structure == TIME_SERIES &&
 1199         (pd == 1 || pd == 4 || pd == 12)) {
 1200         int maj, min;
 1201 
 1202         date_maj_min(dset->t1, dset, &maj, &min);
 1203         fprintf(fp, "%d %d ", maj, min);
 1204         date_maj_min(dset->t2, dset, &maj, &min);
 1205         fprintf(fp, "%d %d %d", maj, min, pd);
 1206         } else {
 1207         fprintf(fp, "%d 1 %d 1 1", dset->t1, dset->t2);
 1208         }
 1209 
 1210         fputc('\n', fp);
 1211 
 1212         for (t=dset->t1; t<=dset->t2; t++) {
 1213         v = list[i];
 1214         xx = dset->Z[v][t];
 1215         if (na(xx)) {
 1216             fprintf(fp, "-9999.99");
 1217         } else {
 1218             fprintf(fp, "%.*g", csv_digits, xx);
 1219         }
 1220         fputc('\n', fp);
 1221         }
 1222         fputc('\n', fp);
 1223     }
 1224     } else if (fmt == GRETL_FMT_JM) {
 1225     /* JMulti: ascii with comments and date info */
 1226     const char *vlabel;
 1227     int maj, min;
 1228 
 1229     fputs("/*\n", fp);
 1230     for (i=1; i<=list[0]; i++) {
 1231         v = list[i];
 1232         vlabel = series_get_label(dset, v);
 1233         fprintf(fp, " %s: %s\n", dset->varname[v],
 1234             vlabel == NULL ? "" : vlabel);
 1235     }
 1236     fputs("*/\n", fp);
 1237     date_maj_min(dset->t1, dset, &maj, &min);
 1238     if (dset->pd == 4 || dset->pd == 12) {
 1239         fprintf(fp, "<%d %c%d>\n", maj, (dset->pd == 4)? 'Q' : 'M', min);
 1240     } else if (dset->pd == 1) {
 1241         fprintf(fp, "<%d>\n", maj);
 1242     } else {
 1243         fputs("<1>\n", fp);
 1244     }
 1245     for (i=1; i<=list[0]; i++) {
 1246         v = list[i];
 1247         fprintf(fp, " %s", dset->varname[v]);
 1248     }
 1249     fputc('\n', fp);
 1250     for (t=dset->t1; t<=dset->t2; t++) {
 1251         for (i=1; i<=list[0]; i++) {
 1252         v = list[i];
 1253         if (na(dset->Z[v][t])) {
 1254             fputs("NaN ", fp);
 1255         } else {
 1256             fprintf(fp, "%.*g ", csv_digits, dset->Z[v][t]);
 1257         }
 1258         }
 1259         fputc('\n', fp);
 1260     }
 1261     }
 1262 
 1263     if (pop_locale) {
 1264     gretl_pop_c_numeric_locale();
 1265     }
 1266 
 1267     if (fp != NULL) {
 1268     fclose(fp);
 1269     }
 1270 
 1271  write_exit:
 1272 
 1273     if (!err && prn != NULL) {
 1274     if (add_ext) {
 1275         pprintf(prn, _("wrote %s.gdt\n"), fname);
 1276     } else {
 1277         pprintf(prn, _("wrote %s\n"), fname);
 1278     }
 1279     }
 1280 
 1281     if (freelist) {
 1282     free(list);
 1283     }
 1284 
 1285     return err;
 1286 }
 1287 
 1288 /**
 1289  * write_data:
 1290  * @fname: name of file to write.
 1291  * @list: list of variables to write (or %NULL to write all series).
 1292  * @dset: dataset struct.
 1293  * @opt: option flag indicating format in which to write the data.
 1294  * @prn: gretl printer or NULL.
 1295  *
 1296  * Write out a data file containing the values of the given set
 1297  * of variables.
 1298  *
 1299  * Returns: 0 on successful completion, non-zero on error.
 1300  */
 1301 
 1302 int write_data (const char *fname, int *list, const DATASET *dset,
 1303         gretlopt opt, PRN *prn)
 1304 {
 1305     return real_write_data(fname, list, dset, opt, 0, prn);
 1306 }
 1307 
 1308 int gui_write_data (const char *fname, int *list, const DATASET *dset,
 1309             gretlopt opt)
 1310 {
 1311     return real_write_data(fname, list, dset, opt, 1, NULL);
 1312 }
 1313 
 1314 /**
 1315  * is_gzipped:
 1316  * @fname: filename to examine.
 1317  *
 1318  * Determine if the given file is gzipped.
 1319  *
 1320  * Returns: 1 in case of a gzipped file, 0 if not gzipped or
 1321  * inaccessible.
 1322  *
 1323  */
 1324 
 1325 int is_gzipped (const char *fname)
 1326 {
 1327     FILE *fp;
 1328     int gz = 0;
 1329 
 1330     if (fname == NULL || *fname == '\0') {
 1331     return 0;
 1332     }
 1333 
 1334     fp = gretl_fopen(fname, "rb");
 1335     if (fp == NULL) {
 1336     return 0;
 1337     }
 1338 
 1339     if (fgetc(fp) == 037 && fgetc(fp) == 0213) {
 1340     gz = 1;
 1341     }
 1342 
 1343     fclose(fp);
 1344 
 1345     return gz;
 1346 }
 1347 
 1348 /**
 1349  * gretl_get_data:
 1350  * @fname: name of file to try.
 1351  * @dset: dataset struct.
 1352  * @opt: option flags.
 1353  * @prn: where messages should be written.
 1354  *
 1355  * Read "native" data from file into gretl's work space,
 1356  * allocating space as required. This function handles
 1357  * both native XML data format and native binary format.
 1358  * It also handles incomplete information: it can perform
 1359  * path-searching on @fname, and will try adding the .gdt
 1360  * or .gdtb extension to @fname if this is not given.
 1361  *
 1362  * Note that a more straightforward function for reading a
 1363  * native gretl data file, given the correct path, is
 1364  * gretl_read_gdt().
 1365  *
 1366  * The only applicable option is that @opt may contain
 1367  * OPT_T when appending data to a panel dataset: in
 1368  * that case we try to interpret the new data as time
 1369  * series, in common across all panel units. In most
 1370  * cases, just give OPT_NONE.
 1371  *
 1372  * Returns: 0 on successful completion, non-zero otherwise.
 1373  */
 1374 
 1375 int gretl_get_data (char *fname, DATASET *dset,
 1376             gretlopt opt, PRN *prn)
 1377 {
 1378     gretlopt append_opt = OPT_NONE;
 1379     int gdtsuff;
 1380     char *test;
 1381     int err = 0;
 1382 
 1383     gretl_error_clear();
 1384 
 1385 #if 0
 1386     fprintf(stderr, "gretl_get_data: calling addpath\n");
 1387 #endif
 1388 
 1389     test = gretl_addpath(fname, 0);
 1390     if (test == NULL) {
 1391     return E_FOPEN;
 1392     }
 1393 
 1394     gdtsuff = has_native_data_suffix(fname);
 1395 
 1396     if (opt & OPT_T) {
 1397     append_opt = OPT_T;
 1398     }
 1399 
 1400     if (gdtsuff) {
 1401     /* specific processing for gretl datafiles  */
 1402     err = gretl_read_gdt(fname, dset, append_opt, prn);
 1403     } else {
 1404     /* try fallback to a "csv"-type import */
 1405     err = import_csv(fname, dset, append_opt, prn);
 1406     }
 1407 
 1408     return err;
 1409 }
 1410 
 1411 /**
 1412  * open_nulldata:
 1413  * @dset: dataset struct.
 1414  * @data_status: indicator for whether a data file is currently open
 1415  * in gretl's work space (1) or not (0).
 1416  * @length: desired length of data series.
 1417  * @opt: may contain OPT_N to suppress addition of an index series.
 1418  * @prn: gretl printing struct.
 1419  *
 1420  * Create an empty "dummy" data set, suitable for simulations.
 1421  *
 1422  * Returns: 0 on successful completion, non-zero otherwise.
 1423  *
 1424  */
 1425 
 1426 int open_nulldata (DATASET *dset, int data_status, int length,
 1427            gretlopt opt, PRN *prn)
 1428 {
 1429     int t;
 1430 
 1431     /* clear any existing data info */
 1432     if (data_status) {
 1433     clear_datainfo(dset, CLEAR_FULL);
 1434     }
 1435 
 1436     /* dummy up the data info */
 1437     dset->n = length;
 1438     dset->v = (opt & OPT_N)? 1 : 2;
 1439     dataset_obs_info_default(dset);
 1440 
 1441     if (dataset_allocate_varnames(dset)) {
 1442     return E_ALLOC;
 1443     }
 1444 
 1445     /* allocate data storage */
 1446     if (allocate_Z(dset, 0)) {
 1447     return E_ALLOC;
 1448     }
 1449 
 1450     if (dset->v > 1) {
 1451     /* add an index var */
 1452     strcpy(dset->varname[1], "index");
 1453     series_set_label(dset, 1, _("index variable"));
 1454     for (t=0; t<dset->n; t++) {
 1455         dset->Z[1][t] = (double) (t + 1);
 1456     }
 1457     }
 1458 
 1459     if (prn != NULL && gretl_messages_on()) {
 1460     /* print basic info */
 1461     pprintf(prn, A_("periodicity: %d, maxobs: %d\n"
 1462             "observations range: %s to %s\n"),
 1463         dset->pd, dset->n, dset->stobs, dset->endobs);
 1464     }
 1465 
 1466     /* Set sample range to entire length of data-set by default */
 1467     dset->t1 = 0;
 1468     dset->t2 = dset->n - 1;
 1469 
 1470     return 0;
 1471 }
 1472 
 1473 static int extend_markers (DATASET *dset, int old_n, int new_n)
 1474 {
 1475     char **S = realloc(dset->S, new_n * sizeof *S);
 1476     int t, err = 0;
 1477 
 1478     if (S == NULL) {
 1479     err = 1;
 1480     } else {
 1481     dset->S = S;
 1482     for (t=old_n; t<new_n && !err; t++) {
 1483         S[t] = malloc(OBSLEN);
 1484         if (S[t] == NULL) {
 1485         err = 1;
 1486         }
 1487     }
 1488     }
 1489 
 1490     return err;
 1491 }
 1492 
 1493 static void merge_error (const char *msg, PRN *prn)
 1494 {
 1495     pputs(prn, msg);
 1496     if (!printing_to_standard_stream(prn)) {
 1497     gretl_errmsg_set(msg);
 1498     }
 1499 }
 1500 
 1501 static void merge_name_error (const char *objname, PRN *prn)
 1502 {
 1503     gchar *msg;
 1504 
 1505     msg = g_strdup_printf("Can't replace %s with a series", objname);
 1506     pprintf(prn, "%s\n", msg);
 1507     if (!printing_to_standard_stream(prn)) {
 1508     gretl_errmsg_set(msg);
 1509     }
 1510     g_free(msg);
 1511 }
 1512 
 1513 static int count_new_vars (const DATASET *d1, const DATASET *d2,
 1514                PRN *prn)
 1515 {
 1516     const char *vname;
 1517     int addvars = d2->v - 1;
 1518     int i, j;
 1519 
 1520     /* We start by assuming that all the series in @d2 are new,
 1521        then subtract those we find to be already present. We also
 1522        check for collision between the names of series to be added and
 1523        the names of existing objects other than series.
 1524     */
 1525 
 1526     for (i=1; i<d2->v && addvars >= 0; i++) {
 1527     vname = d2->varname[i];
 1528     if (gretl_is_user_var(vname)) {
 1529         merge_name_error(vname, prn);
 1530         addvars = -1;
 1531     } else if (gretl_function_depth() > 0) {
 1532         if (current_series_index(d1, vname) > 0) {
 1533         addvars--;
 1534         }
 1535     } else {
 1536         for (j=1; j<d1->v; j++) {
 1537         if (!strcmp(vname, d1->varname[j])) {
 1538             addvars--;
 1539             break;
 1540         }
 1541         }
 1542     }
 1543     }
 1544 
 1545 #if MERGE_DEBUG
 1546     if (gretl_function_depth() == 0) {
 1547     int found;
 1548 
 1549     for (i=1; i<d2->v; i++) {
 1550         found = 0;
 1551         for (j=1; j<d1->v && !found; j++) {
 1552         if (!strcmp(d2->varname[i], d1->varname[j])) {
 1553             found = 1;
 1554         }
 1555         }
 1556         if (!found) {
 1557         fprintf(stderr, "'%s' in import but not current dataset\n",
 1558             d2->varname[i]);
 1559         }
 1560     }
 1561     }
 1562 #endif
 1563 
 1564     return addvars;
 1565 }
 1566 
 1567 static int year_special_markers (const DATASET *dset,
 1568                  const DATASET *addset)
 1569 {
 1570     char *test;
 1571     int overlap = 0;
 1572     int i, t, err = 0;
 1573 
 1574     /* See if we can match obs markers in @addset
 1575        against years in @dset: we'll try this if all
 1576        the markers in addset are integer strings, at
 1577        least some of them are within the obs range of
 1578        @dset, and none of them are outside of the
 1579        "sanity" range of 1 to 2500.
 1580     */
 1581 
 1582     if (!dataset_is_time_series(dset) || dset->pd != 1) {
 1583     return 0;
 1584     }
 1585 
 1586     if (dset->markers || !addset->markers) {
 1587     return 0;
 1588     }
 1589 
 1590     errno = 0;
 1591 
 1592     for (i=0; i<addset->n; i++) {
 1593     t = strtol(addset->S[i], &test, 10);
 1594     if (*test || errno) {
 1595         errno = 0;
 1596         err = 1;
 1597         break;
 1598     }
 1599     if (t < 1 || t > 2500) {
 1600         err = 1;
 1601         break;
 1602     }
 1603     if (!overlap) {
 1604         t = dateton(addset->S[i], dset);
 1605         if (t >= 0 && t < dset->n) {
 1606         overlap = 1;
 1607         }
 1608     }
 1609     }
 1610 
 1611     return !err && overlap;
 1612 }
 1613 
 1614 static int compare_ranges (const DATASET *targ,
 1615                const DATASET *src,
 1616                int newvars,
 1617                int *offset,
 1618                int *yrspecial,
 1619                int *err)
 1620 {
 1621     int ed0 = dateton(targ->endobs, targ);
 1622     int sd1, ed1, addobs = -1;
 1623     int range_err = 0;
 1624 
 1625     if (dataset_is_cross_section(targ) &&
 1626     dataset_is_cross_section(src) &&
 1627     !(targ->markers && src->markers)) {
 1628     if (newvars == 0) {
 1629         if (src->markers) {
 1630         /* pass the problem on to just_append_rows */
 1631         return 0;
 1632         } else {
 1633         /* assume the new data should be appended length-wise */
 1634         *offset = ed0 + 1;
 1635         return src->n;
 1636         }
 1637     } else {
 1638         /* we've already determined that the series length in
 1639            @src doesn't match either the full series length or
 1640            the current sample range in @targ; we therefore have
 1641            no information with which to match rows for new
 1642            series
 1643         */
 1644         gretl_errmsg_set(_("append: don't know how to align the new series!"));
 1645         *err = E_DATA;
 1646         return -1;
 1647     }
 1648     }
 1649 
 1650     sd1 = merge_dateton(src->stobs, targ);
 1651     ed1 = merge_dateton(src->endobs, targ);
 1652 
 1653 #if DATES_DEBUG
 1654     fprintf(stderr, "compare_ranges:\n"
 1655         " targ->n = %d, src->n = %d\n"
 1656         " targ->stobs = '%s', src->stobs = '%s'\n"
 1657         " sd1 = %d, ed1 = %d\n",
 1658         targ->n, src->n, targ->stobs, src->stobs,
 1659         sd1, ed1);
 1660 #endif
 1661 
 1662     if (sd1 < 0) {
 1663     /* case: new data start earlier than old */
 1664     if (ed1 < 0) {
 1665         range_err = 1;
 1666     } else if (ed1 > ed0) {
 1667         range_err = 2;
 1668     } else {
 1669         *offset = sd1;
 1670         addobs = 0;
 1671     }
 1672     } else if (sd1 == 0 && ed1 == ed0) {
 1673     /* case: exact match of ranges */
 1674     *offset = 0;
 1675     addobs = 0;
 1676     } else if (sd1 == 0) {
 1677     /* case: starting obs the same */
 1678     *offset = 0;
 1679     if (ed1 > ed0) {
 1680         addobs = ed1 - ed0;
 1681     } else {
 1682         addobs = 0;
 1683     }
 1684     } else if (sd1 == ed0 + 1) {
 1685     /* case: new data start right after end of old */
 1686     *offset = sd1;
 1687     addobs = src->n;
 1688     } else if (sd1 > 0) {
 1689     /* case: new data start later than old */
 1690     if (sd1 <= ed0) {
 1691         /* but there's some overlap */
 1692         *offset = sd1;
 1693         if (ed1 > ed0) {
 1694         addobs = ed1 - ed0;
 1695         } else {
 1696         addobs = 0;
 1697         }
 1698     }
 1699     }
 1700 
 1701     if (range_err) {
 1702     /* try another approach? */
 1703     *yrspecial = year_special_markers(targ, src);
 1704     if (*yrspecial) {
 1705         addobs = 0;
 1706     }
 1707     }
 1708 
 1709     if (addobs < 0) {
 1710     if (range_err == 1) {
 1711         fputs("compare_ranges: no overlap, can't merge\n", stderr);
 1712     } else if (range_err == 2) {
 1713         fputs("compare ranges: new data start earlier, end later\n", stderr);
 1714     } else {
 1715         fputs("compare_ranges: flagging error\n", stderr);
 1716     }
 1717     }
 1718 
 1719     return addobs;
 1720 }
 1721 
 1722 /* Determine whether there's any overlap between the calendar
 1723    in @addset and that in @dset. Return 0 on success (there is
 1724    an overlap), non-zero otherwise.
 1725 */
 1726 
 1727 static int check_for_overlap (const DATASET *dset,
 1728                   const DATASET *addset,
 1729                   int *offset)
 1730 {
 1731     int at1 = merge_dateton(addset->stobs, dset);
 1732     int at2 = merge_dateton(addset->endobs, dset);
 1733 
 1734     if (!(at1 >= dset->n) && !(at2 < 0)) {
 1735     /* OK, there must be some overlap */
 1736     *offset = at1;
 1737     return 0;
 1738     } else {
 1739     /* either the "add" data start after the original data end,
 1740        or they end before the originals start, no there's no
 1741        overlap
 1742     */
 1743     gretl_errmsg_set("No overlap in data ranges");
 1744     return E_DATA;
 1745     }
 1746 }
 1747 
 1748 /* When appending data to a current panel dataset, and the length of
 1749    the series in the new data is less than the full panel size
 1750    (n * T), try to determine if it's OK to expand the incoming data to
 1751    match.
 1752 
 1753    We'll say it's OK if the new series length equals the panel T: in
 1754    that case we'll take the new data to be time-series, which should
 1755    be replicated for each panel unit.
 1756 
 1757    A second possibility arises if the length of the new series
 1758    equals the panel n: in that case we could treat it as a time-
 1759    invariant characteristic of the panel unit, which should be
 1760    replicated for each time period.  But note that if OPT_T is
 1761    given, this second expansion is forbidden: the user has
 1762    stipulated that the new data are time-varying.
 1763 */
 1764 
 1765 static int panel_expand_ok (DATASET *dset, DATASET *addinfo,
 1766                 gretlopt opt)
 1767 {
 1768     int n = dset->n / dset->pd;
 1769     int T = dset->pd;
 1770     int ok = 0;
 1771 
 1772     if (addinfo->n == T) {
 1773     ok = 1;
 1774     } else if (!(opt & OPT_T) &&
 1775            addinfo->n == n &&
 1776            addinfo->pd == 1) {
 1777     ok = 1;
 1778     }
 1779 
 1780     return ok;
 1781 }
 1782 
 1783 static int panel_append_special (int addvars,
 1784                  DATASET *dset,
 1785                  DATASET *addset,
 1786                  gretlopt opt,
 1787                  PRN *prn)
 1788 {
 1789     int n = dset->n / dset->pd;
 1790     int T = dset->pd;
 1791     int k = dset->v;
 1792     int tsdata;
 1793     int i, j, s, p, t;
 1794     int err = 0;
 1795 
 1796     if (addvars > 0 && dataset_add_series(dset, addvars)) {
 1797     merge_error(_("Out of memory!\n"), prn);
 1798     err = E_ALLOC;
 1799     }
 1800 
 1801     tsdata = ((opt & OPT_T) || addset->n != n);
 1802 
 1803     for (i=1; i<addset->v && !err; i++) {
 1804     int v = series_index(dset, addset->varname[i]);
 1805 
 1806     if (v >= k) {
 1807         /* a new variable */
 1808         v = k++;
 1809         strcpy(dset->varname[v], addset->varname[i]);
 1810         copy_varinfo(dset->varinfo[v], addset->varinfo[i]);
 1811     }
 1812 
 1813     s = 0;
 1814     for (j=0; j<n; j++) {
 1815         /* loop across units */
 1816         for (t=0; t<T; t++) {
 1817         /* loop across periods */
 1818         p = (tsdata)? t : j;
 1819         dset->Z[v][s++] = addset->Z[i][p];
 1820         }
 1821     }
 1822     }
 1823 
 1824     return err;
 1825 }
 1826 
 1827 static int markers_compatible (const DATASET *d1, DATASET *d2,
 1828                    int *offset)
 1829 {
 1830     int ret = 0;
 1831 
 1832     if (d1->markers == 0 && d2->markers == 0) {
 1833     *offset = d1->n;
 1834     ret = 1;
 1835     } else if (d1->markers == 0) {
 1836     /* markers "on the right only": are they consecutive
 1837        integers starting between 1 and d1->n + 1?
 1838     */
 1839     if (integer_string(d2->S[0])) {
 1840         int k0 = atoi(d2->S[0]);
 1841 
 1842         if (k0 >= 1 && k0 <= d1->n + 1) {
 1843         int i, k1;
 1844 
 1845         ret = 1;
 1846         for (i=1; i<d2->n && ret; i++) {
 1847             if (!integer_string(d2->S[i])) {
 1848             ret = 0;
 1849             } else if ((k1 = atoi(d2->S[i])) != k0 + 1) {
 1850             ret = 0;
 1851             } else {
 1852             k0 = k1;
 1853             }
 1854         }
 1855         }
 1856         if (ret) {
 1857         *offset = atoi(d2->S[0]) - 1;
 1858         /* the @d2 markers have done their job -- yielding
 1859            an @offset value -- and they can now be trashed
 1860         */
 1861         dataset_destroy_obs_markers(d2);
 1862         }
 1863     }
 1864     } else {
 1865     /* markers on both sides: are they totally distinct? */
 1866     int i, j;
 1867 
 1868     ret = 1;
 1869     for (i=0; i<d2->n && ret; i++) {
 1870         for (j=0; j<d1->n && ret; j++) {
 1871         if (!strcmp(d2->S[i], d1->S[j])) {
 1872             /* no, not totally distinct */
 1873             ret = 0;
 1874         }
 1875         }
 1876     }
 1877     }
 1878 
 1879 #if MERGE_DEBUG
 1880     fprintf(stderr, " markers_compatible: ret=%d, offset=%d\n", ret, *offset);
 1881 #endif
 1882 
 1883     return ret;
 1884 }
 1885 
 1886 static int
 1887 just_append_rows (const DATASET *targ, DATASET *src, int *offset)
 1888 {
 1889     int ret = 0;
 1890 
 1891     if (targ->structure == CROSS_SECTION &&
 1892     src->structure == CROSS_SECTION &&
 1893     targ->sd0 == 1 && src->sd0 == 1) {
 1894     int ok, test_offset = -1;
 1895 
 1896     ok = markers_compatible(targ, src, &test_offset);
 1897     if (ok) {
 1898         /* note: we do this only if we're not adding any new
 1899            series: we'll append to existing series lengthwise
 1900            (or perhaps write data into existing existing rows)
 1901         */
 1902         *offset = test_offset;
 1903         ret = src->n - (targ->n - *offset);
 1904         if (ret < 0) {
 1905         ret = 0;
 1906         }
 1907     }
 1908     }
 1909 
 1910     return ret;
 1911 }
 1912 
 1913 static int simple_range_match (const DATASET *targ, const DATASET *src,
 1914                    int *offset)
 1915 {
 1916     int ret = 0;
 1917 
 1918     if (src->pd == 1 && src->structure == CROSS_SECTION) {
 1919     if (src->n == targ->n) {
 1920         ret = 1;
 1921     } else if (src->n == targ->t2 - targ->t1 + 1) {
 1922         ret = 1;
 1923         *offset = targ->t1;
 1924     }
 1925     }
 1926 
 1927     return ret;
 1928 }
 1929 
 1930 static int merge_lengthen_series (DATASET *dset,
 1931                   const DATASET *addset,
 1932                   int addobs,
 1933                   int offset)
 1934 {
 1935     int i, t, new_n = dset->n + addobs;
 1936     int err = 0;
 1937 
 1938     if (dset->markers) {
 1939     err = extend_markers(dset, dset->n, new_n);
 1940     if (!err) {
 1941         for (t=dset->n; t<new_n; t++) {
 1942         strcpy(dset->S[t], addset->S[t-offset]);
 1943         }
 1944     }
 1945     }
 1946 
 1947     for (i=0; i<dset->v && !err; i++) {
 1948     double *x;
 1949 
 1950     x = realloc(dset->Z[i], new_n * sizeof *x);
 1951     if (x == NULL) {
 1952         err = E_ALLOC;
 1953         break;
 1954     }
 1955 
 1956     for (t=dset->n; t<new_n; t++) {
 1957         if (i == 0) {
 1958         x[t] = 1.0;
 1959         } else {
 1960         x[t] = NADBL;
 1961         }
 1962     }
 1963     dset->Z[i] = x;
 1964     }
 1965 
 1966     if (!err) {
 1967     dset->n = new_n;
 1968     ntodate(dset->endobs, new_n - 1, dset);
 1969     dset->t2 = dset->n - 1;
 1970     }
 1971 
 1972     return err;
 1973 }
 1974 
 1975 #if 0 /* not yet (maybe usable with DND */
 1976 
 1977 int basic_data_merge_check (const DATASET *dset,
 1978                 DATASET *addset)
 1979 {
 1980     int dayspecial = 0;
 1981     int yrspecial = 0;
 1982     int addsimple = 0;
 1983     int addvars = 0;
 1984     int addobs = 0;
 1985     int offset = 0;
 1986     int err = 0;
 1987 
 1988     /* first see how many new vars we have */
 1989     addvars = count_new_vars(dset, addset, NULL);
 1990     if (addvars < 0) {
 1991     return 1;
 1992     }
 1993 
 1994     if (dated_daily_data(dset) && dated_daily_data(addset)) {
 1995     dayspecial = 1;
 1996     }
 1997 
 1998     if (simple_range_match(dset, addset, &offset)) {
 1999     addsimple = 1;
 2000     } else if (dset->pd != addset->pd) {
 2001     err = 1;
 2002     }
 2003 
 2004     if (!err) {
 2005     if (!addsimple) {
 2006         addobs = compare_ranges(dset, addset, addvars, &offset,
 2007                     &yrspecial, &err);
 2008     }
 2009     if (!err && addobs <= 0 && addvars == 0) {
 2010         addobs = just_append_rows(dset, addset, &offset);
 2011     }
 2012     }
 2013 
 2014     if (!err && (addobs < 0 || addvars < 0)) {
 2015     err = E_DATA;
 2016     }
 2017 
 2018     if (!err && dset->markers != addset->markers) {
 2019     if (addobs == 0 && addvars == 0) {
 2020         err = E_DATA;
 2021     } else if (addset->n != dset->n && !yrspecial && !dayspecial) {
 2022         err = E_DATA;
 2023     }
 2024     }
 2025 
 2026     return err;
 2027 }
 2028 
 2029 #endif
 2030 
 2031 #define simple_structure(p) (p->structure == TIME_SERIES ||     \
 2032                  p->structure == SPECIAL_TIME_SERIES || \
 2033                  (p->structure == CROSS_SECTION &&      \
 2034                   p->S == NULL))
 2035 
 2036 /**
 2037  * merge_data:
 2038  * @dset: dataset struct.
 2039  * @addset: dataset to be merged in.
 2040  * @opt: may include OPT_T to force a time-series interpretation
 2041  * when appending to a panel dataset; may include OPT_U to update
 2042  * values of overlapping observations.
 2043  * @prn: print struct to accept messages.
 2044  *
 2045  * Attempt to merge the content of a newly opened data file into
 2046  * gretl's current working data set.
 2047  *
 2048  * Returns: 0 on successful completion, non-zero otherwise.
 2049  */
 2050 
 2051 static int merge_data (DATASET *dset, DATASET *addset,
 2052                gretlopt opt, PRN *prn)
 2053 {
 2054     int update_overlap = (opt & OPT_U);
 2055     int orig_n = dset->n;
 2056     int dayspecial = 0;
 2057     int yrspecial = 0;
 2058     int fixsample = 0;
 2059     int addsimple = 0;
 2060     int addpanel = 0;
 2061     int addvars = 0;
 2062     int addobs = 0;
 2063     int offset = 0;
 2064     int err = 0;
 2065 
 2066 #if MERGE_DEBUG
 2067     debug_print_option_flags("merge_data", opt);
 2068 #endif
 2069 
 2070     /* first see how many new vars we have */
 2071     addvars = count_new_vars(dset, addset, prn);
 2072     if (addvars < 0) {
 2073     return 1;
 2074     }
 2075 
 2076 #if MERGE_DEBUG
 2077     fprintf(stderr, " new series count = %d\n", addvars);
 2078 #endif
 2079 
 2080     if (dated_daily_data(dset) && dated_daily_data(addset)) {
 2081 #if MERGE_DEBUG
 2082     fprintf(stderr, " special: merging daily data\n");
 2083 #endif
 2084     dayspecial = 1;
 2085     }
 2086 
 2087     if (opt & OPT_X) {
 2088     fixsample = 1;
 2089     } else if (simple_range_match(dset, addset, &offset)) {
 2090     /* we'll allow undated data to be merged with the existing
 2091        dateset, sideways, provided the number of observations
 2092        matches OK */
 2093     addsimple = 1;
 2094     } else if (dataset_is_panel(dset) &&
 2095            panel_expand_ok(dset, addset, opt)) {
 2096     /* allow appending to panel when the number of obs matches
 2097        either the cross-section size or the time-series length
 2098     */
 2099     addpanel = 1;
 2100     } else if (dset->pd != addset->pd) {
 2101     merge_error(_("Data frequency does not match\n"), prn);
 2102     err = 1;
 2103     }
 2104 
 2105     if (!err && fixsample) {
 2106     err = check_for_overlap(dset, addset, &offset);
 2107     } else if (!err && gretl_function_depth() > 0) {
 2108     /* we won't add observations within a function, but
 2109        we should still check for an error from compare_ranges()
 2110     */
 2111     if (!addsimple && !addpanel) {
 2112         addobs = compare_ranges(dset, addset, addvars, &offset,
 2113                     &yrspecial, &err);
 2114         if (!err && addobs > 0) {
 2115         addobs = 0;
 2116         }
 2117     }
 2118     } else if (!err) {
 2119     if (!addsimple && !addpanel) {
 2120         addobs = compare_ranges(dset, addset, addvars, &offset,
 2121                     &yrspecial, &err);
 2122 #if MERGE_DEBUG
 2123         fprintf(stderr, " added obs, from compare_ranges: %d\n", addobs);
 2124 #endif
 2125     }
 2126     if (!err && addobs <= 0 && addvars == 0) {
 2127         addobs = just_append_rows(dset, addset, &offset);
 2128 #if MERGE_DEBUG
 2129         fprintf(stderr, " added obs, from just_append_rows: %d\n", addobs);
 2130 #endif
 2131     }
 2132     }
 2133 
 2134     if (!err && (addobs < 0 || addvars < 0)) {
 2135     merge_error(_("New data not conformable for appending\n"), prn);
 2136     err = E_DATA;
 2137     }
 2138 
 2139     if (!err && !addpanel && dset->markers != addset->markers) {
 2140     if (addobs == 0 && addvars == 0) {
 2141         if (update_overlap) {
 2142         ; /* might be OK? */
 2143         } else {
 2144         gretl_errmsg_set("Found no data conformable for appending");
 2145         err = E_DATA;
 2146         }
 2147     } else if (addset->n != dset->n && !yrspecial && !dayspecial) {
 2148         merge_error(_("Inconsistency in observation markers\n"), prn);
 2149         err = E_DATA;
 2150     } else if (addset->markers && !dset->markers &&
 2151            !yrspecial && !dayspecial) {
 2152         dataset_destroy_obs_markers(addset);
 2153     }
 2154     }
 2155 
 2156 #if MERGE_DEBUG
 2157     if (!err) {
 2158     fprintf(stderr, " after preliminaries: addvars = %d, addobs = %d\n",
 2159         addvars, addobs);
 2160     } else {
 2161     fprintf(stderr, " after preliminaries: err = %d\n", err);
 2162     }
 2163 #endif
 2164 
 2165     /* if checks are passed, try merging the data */
 2166 
 2167     if (!err && addobs > 0) {
 2168     err = merge_lengthen_series(dset, addset, addobs, offset);
 2169     if (err) {
 2170         merge_error(_("Out of memory!\n"), prn);
 2171     }
 2172     }
 2173 
 2174     if (!err && addpanel) {
 2175     err = panel_append_special(addvars, dset, addset,
 2176                    opt, prn);
 2177     } else if (!err) {
 2178     int k = dset->v;
 2179     int i, t;
 2180 
 2181     if (addvars > 0 && dataset_add_series(dset, addvars)) {
 2182         merge_error(_("Out of memory!\n"), prn);
 2183         err = E_ALLOC;
 2184     }
 2185 
 2186     for (i=1; i<addset->v && !err; i++) {
 2187         int v = series_index(dset, addset->varname[i]);
 2188         int tmin, newvar = v >= k;
 2189 
 2190         if (!newvar && !update_overlap) {
 2191         tmin = orig_n;
 2192         } else {
 2193         tmin = 0;
 2194         }
 2195 
 2196         if (newvar) {
 2197         v = k++;
 2198         strcpy(dset->varname[v], addset->varname[i]);
 2199         copy_varinfo(dset->varinfo[v], addset->varinfo[i]);
 2200         if (is_string_valued(addset, i) &&
 2201             addset->n == dset->n && offset == 0 &&
 2202             addobs == 0) {
 2203             /* attach the string table to the target
 2204                series and detach it from @addset
 2205             */
 2206             series_table *st;
 2207 
 2208             st = series_get_string_table(addset, i);
 2209             series_attach_string_table(dset, v, st);
 2210             series_attach_string_table(addset, i, NULL);
 2211         }
 2212         } else {
 2213         /* not a new series */
 2214         int lsval = is_string_valued(dset, v);
 2215         int rsval = is_string_valued(addset, i);
 2216 
 2217         if (lsval + rsval == 1) {
 2218             gretl_errmsg_set(_("Can't concatenate string-valued and numeric series"));
 2219             err = E_DATA;
 2220         } else if (lsval) {
 2221             err = merge_string_tables(dset, v, addset, i);
 2222         }
 2223         }
 2224 
 2225         if (dayspecial) {
 2226         char obs[OBSLEN];
 2227         int s;
 2228 
 2229         for (t=tmin; t<dset->n; t++) {
 2230             ntodate(obs, t, dset);
 2231             s = dateton(obs, addset);
 2232             if (s >= 0 && s < addset->n) {
 2233             dset->Z[v][t] = addset->Z[i][s];
 2234             } else {
 2235             dset->Z[v][t] = NADBL;
 2236             }
 2237         }
 2238         } else if (yrspecial) {
 2239         int s;
 2240 
 2241         if (newvar) {
 2242             for (t=0; t<dset->n; t++) {
 2243             dset->Z[v][t] = NADBL;
 2244             }
 2245         }
 2246         for (s=0; s<addset->n; s++) {
 2247             t = dateton(addset->S[s], dset);
 2248             if (t >= tmin && t < dset->n) {
 2249             dset->Z[v][t] = addset->Z[i][s];
 2250             }
 2251         }
 2252         } else {
 2253         for (t=tmin; t<dset->n; t++) {
 2254             if (t >= offset && t - offset < addset->n) {
 2255             dset->Z[v][t] = addset->Z[i][t - offset];
 2256             } else if (newvar) {
 2257             dset->Z[v][t] = NADBL;
 2258             }
 2259         }
 2260         }
 2261     }
 2262     }
 2263 
 2264     if (!err && (addvars || addobs) && gretl_messages_on()) {
 2265     pputs(prn, _("Data appended OK\n"));
 2266     }
 2267 
 2268     return err;
 2269 }
 2270 
 2271 /* We want to ensure that calendar dates are recorded as per
 2272    ISO 8601 -- that is, YYYY-MM-DD; here we remedy dates
 2273    recorded in the form YYYY/MM/DD.
 2274 */
 2275 
 2276 static void maybe_fix_calendar_dates (DATASET *dset)
 2277 {
 2278     if (strchr(dset->stobs, '/') != NULL) {
 2279     gretl_charsub(dset->stobs, '/', '-');
 2280     gretl_charsub(dset->endobs, '/', '-');
 2281     if (dset->S != NULL && dset->markers == DAILY_DATE_STRINGS) {
 2282         int t;
 2283 
 2284         for (t=0; t<dset->n; t++) {
 2285         gretl_charsub(dset->S[t], '/', '-');
 2286         }
 2287     }
 2288     }
 2289 }
 2290 
 2291 /**
 2292  * get_merge_opts:
 2293  * @opt: gretl options flags.
 2294  *
 2295  * Returns: just those components of @opt (if any) that
 2296  * can be passed to merge_or_replace_data(); may be
 2297  * useful when calling that function in the context
 2298  * of a command only some of whose options should be
 2299  * forwarded.
 2300  */
 2301 
 2302 gretlopt get_merge_opts (gretlopt opt)
 2303 {
 2304     gretlopt merge_opt = OPT_NONE;
 2305 
 2306     if (opt & OPT_T) {
 2307     /* panel, common time-series */
 2308     merge_opt |= OPT_T;
 2309     }
 2310     if (opt & OPT_U) {
 2311     /* update overlapping observations */
 2312     merge_opt |= OPT_U;
 2313     }
 2314     if (opt & OPT_X) {
 2315     /* fixed sample range */
 2316     merge_opt |= OPT_X;
 2317     }
 2318 
 2319     return merge_opt;
 2320 }
 2321 
 2322 /**
 2323  * merge_or_replace_data:
 2324  * @dset0: original dataset struct.
 2325  * @pdset1: new dataset struct.
 2326  * @opt: zero or more option flags (OPT_K presrves @pdset1,
 2327  * otherwise it is destroyed).
 2328  * @prn: print struct to accept messages.
 2329  *
 2330  * Given a newly-created dataset, pointed to by @pdset1, either
 2331  * attempt to merge it with @dset0, if the original data array
 2332  * is non-NULL, or replace the content of the original pointer
 2333  * with the new dataset.
 2334  *
 2335  * In case merging is not successful, the new dataset is
 2336  * destroyed.
 2337  *
 2338  * Returns: 0 on successful completion, non-zero otherwise.
 2339  */
 2340 
 2341 int merge_or_replace_data (DATASET *dset0, DATASET **pdset1,
 2342                gretlopt opt, PRN *prn)
 2343 {
 2344     int keep = (opt & OPT_K);
 2345     int err = 0;
 2346 
 2347     if (dset0->Z != NULL) {
 2348     /* we have an existing dataset into which the new data
 2349        should be merged */
 2350     gretlopt merge_opt = OPT_NONE;
 2351 
 2352     if (opt & OPT_T) {
 2353         /* panel, common time-series */
 2354         merge_opt |= OPT_T;
 2355     }
 2356     if (opt & OPT_U) {
 2357         /* update overlapping observations */
 2358         merge_opt |= OPT_U;
 2359     }
 2360     if (opt & OPT_X) {
 2361         /* fixed sample range */
 2362         merge_opt |= OPT_X;
 2363     }
 2364     err = merge_data(dset0, *pdset1, merge_opt, prn);
 2365     if (!keep) {
 2366         destroy_dataset(*pdset1);
 2367     }
 2368     } else {
 2369     /* starting from scratch */
 2370     *dset0 = **pdset1;
 2371     free(*pdset1);
 2372     if (calendar_data(dset0)) {
 2373         maybe_fix_calendar_dates(dset0);
 2374     }
 2375     }
 2376 
 2377     if (!keep) {
 2378     *pdset1 = NULL;
 2379     }
 2380 
 2381     return err;
 2382 }
 2383 
 2384 static int check_imported_string (char *src, int i, size_t len)
 2385 {
 2386     int err = 0;
 2387 
 2388     if (!g_utf8_validate(src, -1, NULL)) {
 2389     gchar *trstr = NULL;
 2390     gsize bytes;
 2391 
 2392     trstr = g_locale_to_utf8(src, -1, NULL, &bytes, NULL);
 2393 
 2394     if (trstr == NULL) {
 2395         gretl_errmsg_sprintf("Invalid characters in imported string, line %d", i);
 2396         err = E_DATA;
 2397     } else {
 2398         *src = '\0';
 2399         strncat(src, trstr, len - 1);
 2400         g_free(trstr);
 2401     }
 2402     }
 2403 
 2404     return err;
 2405 }
 2406 
 2407 static int count_markers (FILE *fp, char *line, int linelen,
 2408               char *marker)
 2409 {
 2410     int n = 0;
 2411 
 2412     while (fgets(line, linelen, fp)) {
 2413     if (sscanf(line, "%31[^\n\r]", marker) == 1) {
 2414         g_strstrip(marker);
 2415         if (*marker != '\0') {
 2416         n++;
 2417         }
 2418     }
 2419     }
 2420 
 2421     rewind(fp);
 2422 
 2423     return n;
 2424 }
 2425 
 2426 /**
 2427  * add_obs_markers_from_file:
 2428  * @dset: data information struct.
 2429  * @fname: name of file containing case markers.
 2430  *
 2431  * Read case markers (strings of %OBSLEN - 1 characters or less that identify
 2432  * the observations) from a file, and associate them with the
 2433  * current data set.  The file should contain one marker per line,
 2434  * with a number of lines equal to the number of observations in
 2435  * the current data set.
 2436  *
 2437  * Returns: 0 on successful completion, non-zero otherwise.
 2438  */
 2439 
 2440 int add_obs_markers_from_file (DATASET *dset, const char *fname)
 2441 {
 2442     char **S = NULL;
 2443     FILE *fp;
 2444     char line[128], marker[32];
 2445     int done = 0;
 2446     int t, err = 0;
 2447 
 2448     fp = gretl_fopen(fname, "r");
 2449     if (fp == NULL) {
 2450     return E_FOPEN;
 2451     }
 2452 
 2453     S = strings_array_new_with_length(dset->n, OBSLEN);
 2454     if (S == NULL) {
 2455     fclose(fp);
 2456     return E_ALLOC;
 2457     }
 2458 
 2459     if (dataset_is_panel(dset)) {
 2460     /* allow the case where we get just enough markers to
 2461        label the cross-sectional units */
 2462     int nm = count_markers(fp, line, sizeof line, marker);
 2463     int N = dset->n / dset->pd; /* = number of units */
 2464 
 2465     if (nm == N) {
 2466         int T = dset->pd;
 2467         int t, i = 0;
 2468 
 2469         while (fgets(line, sizeof line, fp) && !err) {
 2470         *marker = '\0';
 2471         if (sscanf(line, "%31[^\n\r]", marker) == 1) {
 2472             g_strstrip(marker);
 2473             strncat(S[i], marker, OBSLEN - 1);
 2474             err = check_imported_string(S[i], i+1, OBSLEN);
 2475             if (!err) {
 2476             /* copy to remaining observations */
 2477             for (t=1; t<T; t++) {
 2478                 strcpy(S[i+t], S[i]);
 2479             }
 2480             }
 2481             i += T;
 2482         }
 2483         }
 2484         done = 1;
 2485     }
 2486     }
 2487 
 2488     if (!done) {
 2489     for (t=0; t<dset->n && !err; t++) {
 2490         if (fgets(line, sizeof line, fp) == NULL) {
 2491         gretl_errmsg_sprintf("Expected %d markers; found %d\n",
 2492                      dset->n, t);
 2493         err = E_DATA;
 2494         } else if (sscanf(line, "%31[^\n\r]", marker) != 1) {
 2495         gretl_errmsg_sprintf("Couldn't read marker on line %d", t+1);
 2496         err = E_DATA;
 2497         } else {
 2498         g_strstrip(marker);
 2499         strncat(S[t], marker, OBSLEN - 1);
 2500         err = check_imported_string(S[t], t+1, OBSLEN);
 2501         }
 2502     }
 2503     }
 2504 
 2505     if (err) {
 2506     strings_array_free(S, dset->n);
 2507     } else {
 2508     if (dset->S != NULL) {
 2509         strings_array_free(dset->S, dset->n);
 2510     }
 2511     dset->markers = REGULAR_MARKERS;
 2512     dset->S = S;
 2513     }
 2514 
 2515     return err;
 2516 }
 2517 
 2518 /**
 2519  * dataset_has_var_labels:
 2520  * @dset: data information struct.
 2521  *
 2522  * Returns: 1 if at least one variable in the current dataset
 2523  * has a descriptive label, otherwise 0.
 2524  */
 2525 
 2526 int dataset_has_var_labels (const DATASET *dset)
 2527 {
 2528     const char *vlabel;
 2529     int i, imin = 1;
 2530 
 2531     if (dset->v > 1) {
 2532     if (!strcmp(dset->varname[1], "index")) {
 2533         vlabel = series_get_label(dset, 1);
 2534         if (vlabel != NULL && !strcmp(vlabel, _("index variable"))) {
 2535         imin = 2;
 2536         }
 2537     }
 2538     }
 2539 
 2540     for (i=imin; i<dset->v; i++) {
 2541     vlabel = series_get_label(dset, i);
 2542     if (vlabel != NULL && *vlabel != '\0') {
 2543         return 1;
 2544     }
 2545     }
 2546 
 2547     return 0;
 2548 }
 2549 
 2550 /**
 2551  * save_var_labels_to_file:
 2552  * @dset: data information struct.
 2553  * @fname: name of file containing labels.
 2554  *
 2555  * Writes to @fname the descriptive labels for the series in
 2556  * the current dataset.
 2557  *
 2558  * Returns: 0 on successful completion, non-zero otherwise.
 2559  */
 2560 
 2561 int save_var_labels_to_file (const DATASET *dset,
 2562                  const char *fname)
 2563 {
 2564     const char *vlabel;
 2565     FILE *fp;
 2566     int i, err = 0;
 2567 
 2568     fp = gretl_fopen(fname, "w");
 2569 
 2570     if (fp == NULL) {
 2571     err = E_FOPEN;
 2572     } else {
 2573     for (i=1; i<dset->v; i++) {
 2574         vlabel = series_get_label(dset, i);
 2575         fprintf(fp, "%s\n", vlabel == NULL ? "" : vlabel);
 2576     }
 2577     fclose(fp);
 2578     }
 2579 
 2580     return err;
 2581 }
 2582 
 2583 static int save_var_labels_to_array (const DATASET *dset,
 2584                      const char *aname)
 2585 {
 2586     gretl_array *a = NULL;
 2587     int err = 0;
 2588 
 2589     if (gretl_is_series(aname, dset)) {
 2590     err = E_TYPES;
 2591     } else {
 2592     err = check_identifier(aname);
 2593     }
 2594 
 2595     if (!err) {
 2596     a = gretl_array_new(GRETL_TYPE_STRINGS, dset->v - 1, &err);
 2597     }
 2598 
 2599     if (!err) {
 2600     err = user_var_add_or_replace(aname, GRETL_TYPE_STRINGS, a);
 2601     }
 2602 
 2603     if (!err) {
 2604     char *vlabel;
 2605     int i;
 2606 
 2607     for (i=1; i<dset->v; i++) {
 2608         vlabel = (char *) series_get_label(dset, i);
 2609         gretl_array_set_element(a, i-1, vlabel != NULL ? vlabel : "",
 2610                     GRETL_TYPE_STRING, 1);
 2611     }
 2612     }
 2613 
 2614     if (err && a != NULL) {
 2615     gretl_array_destroy(a);
 2616     a = NULL;
 2617     }
 2618 
 2619     return err;
 2620 }
 2621 
 2622 static int save_obs_markers_to_array (const DATASET *dset,
 2623                       const char *aname)
 2624 {
 2625     gretl_array *a = NULL;
 2626     int err = 0;
 2627 
 2628     if (gretl_is_series(aname, dset)) {
 2629     err = E_TYPES;
 2630     } else {
 2631     err = check_identifier(aname);
 2632     }
 2633 
 2634     if (!err) {
 2635     a = gretl_array_new(GRETL_TYPE_STRINGS, dset->n, &err);
 2636     }
 2637 
 2638     if (!err) {
 2639     err = user_var_add_or_replace(aname, GRETL_TYPE_STRINGS, a);
 2640     }
 2641 
 2642     if (!err) {
 2643     char *marker;
 2644     int i;
 2645 
 2646     for (i=0; i<dset->n; i++) {
 2647         marker = dset->S[i];
 2648         gretl_array_set_element(a, i, marker != NULL ? marker : "",
 2649                     GRETL_TYPE_STRING, 1);
 2650     }
 2651     }
 2652 
 2653     if (err && a != NULL) {
 2654     gretl_array_destroy(a);
 2655     a = NULL;
 2656     }
 2657 
 2658     return err;
 2659 }
 2660 
 2661 /**
 2662  * add_var_labels_from_file:
 2663  * @dset: data information struct.
 2664  * @fname: name of file containing labels.
 2665  *
 2666  * Read descriptive variables for labels (strings of %MAXLABEL - 1
 2667  * characters or less) from a file, and associate them with the
 2668  * current data set.  The file should contain one label per line,
 2669  * with a number of lines equal to the number of variables in
 2670  * the current data set, excluding the constant.
 2671  *
 2672  * Returns: 0 on successful completion, non-zero otherwise.
 2673  */
 2674 
 2675 int add_var_labels_from_file (DATASET *dset, const char *fname)
 2676 {
 2677     FILE *fp;
 2678     char line[1024];
 2679     gchar *label;
 2680     int nlabels = 0;
 2681     int i, err = 0;
 2682 
 2683     fp = gretl_fopen(fname, "r");
 2684     if (fp == NULL) {
 2685     return E_FOPEN;
 2686     }
 2687 
 2688     for (i=1; i<dset->v && !err; i++) {
 2689     if (fgets(line, sizeof line, fp) == NULL) {
 2690         break;
 2691     } else {
 2692         label = g_strstrip(g_strdup(line));
 2693         if (strlen(label) > 0) {
 2694         if (!g_utf8_validate(label, -1, NULL)) {
 2695             gchar *trstr = NULL;
 2696             gsize bytes;
 2697 
 2698             trstr = g_locale_to_utf8(label, -1, NULL,
 2699                          &bytes, NULL);
 2700             if (trstr != NULL) {
 2701             series_set_label(dset, i, trstr);
 2702             nlabels++;
 2703             g_free(trstr);
 2704             }
 2705         } else {
 2706             series_set_label(dset, i, label);
 2707             nlabels++;
 2708         }
 2709         }
 2710         g_free(label);
 2711     }
 2712     }
 2713 
 2714     if (!err && nlabels == 0) {
 2715     gretl_errmsg_set("No labels found");
 2716     err = E_DATA;
 2717     }
 2718 
 2719     return err;
 2720 }
 2721 
 2722 static int add_var_labels_from_array (DATASET *dset, const char *aname)
 2723 {
 2724     gretl_array *a = get_array_by_name(aname);
 2725     int i, err = 0;
 2726 
 2727     if (a == NULL) {
 2728     gretl_errmsg_sprintf("%s: no such array", aname);
 2729     err = E_DATA;
 2730     } else if (gretl_array_get_type(a) != GRETL_TYPE_STRINGS ||
 2731            gretl_array_get_length(a) < dset->v - 1) {
 2732     err = E_TYPES;
 2733     }
 2734 
 2735     for (i=1; i<dset->v && !err; i++) {
 2736     const char *s = gretl_array_get_data(a, i-1);
 2737 
 2738     series_set_label(dset, i, s);
 2739     }
 2740 
 2741     return err;
 2742 }
 2743 
 2744 int read_or_write_var_labels (gretlopt opt, DATASET *dset, PRN *prn)
 2745 {
 2746     const char *lname = NULL;
 2747     int err;
 2748 
 2749     err = incompatible_options(opt, OPT_D | OPT_T | OPT_F |
 2750                    OPT_A | OPT_R);
 2751     if (err) {
 2752     return err;
 2753     }
 2754 
 2755     if (opt & (OPT_T | OPT_F | OPT_A | OPT_R)) {
 2756     lname = get_optval_string(LABELS, opt);
 2757     if (lname == NULL) {
 2758         return E_BADOPT;
 2759     } else if (opt & (OPT_T | OPT_F)) {
 2760         gretl_maybe_switch_dir(lname);
 2761     }
 2762     }
 2763 
 2764     if (opt & OPT_D) {
 2765     /* delete */
 2766     int i;
 2767 
 2768     for (i=1; i<dset->v; i++) {
 2769         series_set_label(dset, i, "");
 2770     }
 2771     } else if (opt & (OPT_T | OPT_R)) {
 2772     /* to-file, to-array */
 2773     if (!dataset_has_var_labels(dset)) {
 2774         pprintf(prn, "No labels are available for writing\n");
 2775         err = E_DATA;
 2776     } else {
 2777         if (opt & OPT_T) {
 2778         err = save_var_labels_to_file(dset, lname);
 2779         } else {
 2780         err = save_var_labels_to_array(dset, lname);
 2781         }
 2782         if (!err && gretl_messages_on() && !gretl_looping_quietly()) {
 2783         pprintf(prn, "Labels written OK\n");
 2784         }
 2785     }
 2786     } else if (opt & (OPT_F | OPT_A)) {
 2787     /* from-file, from-array */
 2788     if (opt & OPT_F) {
 2789         err = add_var_labels_from_file(dset, lname);
 2790     } else {
 2791         err = add_var_labels_from_array(dset, lname);
 2792     }
 2793     if (!err && gretl_messages_on() && !gretl_looping_quietly()) {
 2794         pprintf(prn, "Labels loaded OK\n");
 2795     }
 2796     }
 2797 
 2798     return err;
 2799 }
 2800 
 2801 static int save_obs_markers_to_file (DATASET *dset, const char *fname)
 2802 {
 2803     FILE *fp = gretl_fopen(fname, "w");
 2804     int err = 0;
 2805 
 2806     if (fp == NULL) {
 2807     err = E_FOPEN;
 2808     } else {
 2809     int i;
 2810 
 2811     for (i=0; i<dset->n; i++) {
 2812         fprintf(fp, "%s\n", dset->S[i]);
 2813     }
 2814     fclose(fp);
 2815     }
 2816 
 2817     return err;
 2818 }
 2819 
 2820 int read_or_write_obs_markers (gretlopt opt, DATASET *dset, PRN *prn)
 2821 {
 2822     const char *fname = NULL;
 2823     int err;
 2824 
 2825     err = incompatible_options(opt, OPT_D | OPT_T | OPT_F);
 2826     if (err) {
 2827     return err;
 2828     }
 2829 
 2830     if (opt & (OPT_T | OPT_F)) {
 2831     fname = get_optval_string(MARKERS, opt);
 2832     if (fname == NULL) {
 2833         return E_BADOPT;
 2834     } else {
 2835         fname = gretl_maybe_switch_dir(fname);
 2836     }
 2837     }
 2838 
 2839     if (opt & (OPT_A | OPT_T)) {
 2840     /* writing to file or array */
 2841     if (dset->S == NULL) {
 2842         gretl_errmsg_set(_("No markers are available for writing"));
 2843         return E_DATA;
 2844     }
 2845     }
 2846 
 2847     if (opt & OPT_D) {
 2848     /* delete */
 2849     dataset_destroy_obs_markers(dset);
 2850     } else if (opt & OPT_T) {
 2851     /* to-file */
 2852     err = save_obs_markers_to_file(dset, fname);
 2853     if (!err && gretl_messages_on() && !gretl_looping_quietly()) {
 2854         pprintf(prn, "Markers written OK\n");
 2855     }
 2856     } else if (opt & OPT_F) {
 2857     /* from-file */
 2858     err = add_obs_markers_from_file(dset, fname);
 2859     if (!err && gretl_messages_on() && !gretl_looping_quietly()) {
 2860         pprintf(prn, "Markers loaded OK\n");
 2861     }
 2862     } else if (opt & OPT_A) {
 2863     /* to-array */
 2864     const char *aname = get_optval_string(MARKERS, OPT_A);
 2865 
 2866     err = save_obs_markers_to_array(dset, aname);
 2867     }
 2868 
 2869     return err;
 2870 }
 2871 
 2872 static void
 2873 octave_varname (char *name, const char *s, int nnum, int v)
 2874 {
 2875     char nstr[12];
 2876     int len, tr;
 2877 
 2878     if (nnum == 0) {
 2879     strcpy(name, s);
 2880     } else {
 2881     sprintf(nstr, "%d", nnum);
 2882     len = strlen(nstr);
 2883     tr = VNAMELEN - len;
 2884 
 2885     if (tr > 0) {
 2886         strncat(name, s, tr);
 2887         strcat(name, nstr);
 2888     } else {
 2889         sprintf(name, "v%d", v);
 2890     }
 2891     }
 2892 }
 2893 
 2894 static int get_max_line_length (FILE *fp, PRN *prn)
 2895 {
 2896     int c, c1, cc = 0;
 2897     int maxlen = 0;
 2898 
 2899     while ((c = fgetc(fp)) != EOF) {
 2900     if (c == 0x0d) {
 2901         /* CR */
 2902         c1 = fgetc(fp);
 2903         if (c1 == EOF) {
 2904         break;
 2905         } else if (c1 == 0x0a) {
 2906         /* CR + LF -> LF */
 2907         c = c1;
 2908         } else {
 2909         /* Mac-style: CR not followed by LF */
 2910         c = 0x0a;
 2911         ungetc(c1, fp);
 2912         }
 2913     }
 2914     if (c == 0x0a) {
 2915         if (cc > maxlen) {
 2916         maxlen = cc;
 2917         }
 2918         cc = 0;
 2919         continue;
 2920     }
 2921     if (!isspace((unsigned char) c) && !isprint((unsigned char) c) &&
 2922         !(c == CTRLZ)) {
 2923         pprintf(prn, A_("Binary data (%d) encountered: this is not a valid "
 2924                "text file\n"), c);
 2925         return -1;
 2926     }
 2927     cc++;
 2928     }
 2929 
 2930     if (maxlen == 0) {
 2931     pprintf(prn, A_("Data file is empty\n"));
 2932     }
 2933 
 2934     if (maxlen > 0) {
 2935     /* allow for newline and null terminator */
 2936     maxlen += 3;
 2937     }
 2938 
 2939     return maxlen;
 2940 }
 2941 
 2942 static int import_octave (const char *fname, DATASET *dset,
 2943               gretlopt opt, PRN *prn)
 2944 {
 2945     DATASET *octset = NULL;
 2946     FILE *fp = NULL;
 2947     char *line = NULL;
 2948     char tmp[8], fmt[16], name[32];
 2949     int nrows = 0, ncols = 0, nblocks = 0;
 2950     int brows = 0, bcols = 0, oldbcols = 0;
 2951     int maxlen, got_type = 0, got_name = 0;
 2952     int i, t, err = 0;
 2953 
 2954     fp = gretl_fopen(fname, "r");
 2955     if (fp == NULL) {
 2956     return E_FOPEN;
 2957     }
 2958 
 2959     pprintf(prn, "%s %s...\n", A_("parsing"), fname);
 2960 
 2961     maxlen = get_max_line_length(fp, prn);
 2962     if (maxlen <= 0) {
 2963     err = E_DATA;
 2964     goto oct_bailout;
 2965     }
 2966 
 2967     line = malloc(maxlen);
 2968     if (line == NULL) {
 2969     err = E_ALLOC;
 2970     goto oct_bailout;
 2971     }
 2972 
 2973     pprintf(prn, A_("   longest line: %d characters\n"), maxlen - 1);
 2974 
 2975     rewind(fp);
 2976 
 2977     while (fgets(line, maxlen, fp) && !err) {
 2978     if (*line == '#') {
 2979         if (!got_name) {
 2980         if (sscanf(line, "# name: %31s", name) == 1) {
 2981             got_name = 1;
 2982             nblocks++;
 2983             continue;
 2984         }
 2985         }
 2986         if (!got_type) {
 2987         if (sscanf(line, "# type: %7s", tmp) == 1) {
 2988             if (!got_name || strcmp(tmp, "matrix")) {
 2989             err = 1;
 2990             } else {
 2991             got_type = 1;
 2992             }
 2993             continue;
 2994         }
 2995         }
 2996         if (brows == 0) {
 2997         if (sscanf(line, "# rows: %d", &brows) == 1) {
 2998             if (!got_name || !got_type || brows <= 0) {
 2999             err = 1;
 3000             } else if (nrows > 0 && brows != nrows) {
 3001             err = 1;
 3002             } else {
 3003             nrows = brows;
 3004             }
 3005             continue;
 3006         }
 3007         }
 3008         if (bcols == 0) {
 3009         if (sscanf(line, "# columns: %d", &bcols) == 1) {
 3010             if (!got_name || !got_type || bcols <= 0) {
 3011             err = 1;
 3012             } else {
 3013             ncols += bcols;
 3014             pprintf(prn, A_("   Found matrix '%s' with "
 3015                     "%d rows, %d columns\n"), name, brows, bcols);
 3016             }
 3017             continue;
 3018         }
 3019         }
 3020     } else if (string_is_blank(line)) {
 3021         continue;
 3022     } else {
 3023         got_name = 0;
 3024         got_type = 0;
 3025         brows = 0;
 3026         bcols = 0;
 3027     }
 3028     }
 3029 
 3030     if (err || nrows == 0 || ncols == 0) {
 3031     pputs(prn, A_("Invalid data file\n"));
 3032     err = E_DATA;
 3033     goto oct_bailout;
 3034     }
 3035 
 3036     /* initialize datainfo and Z */
 3037 
 3038     octset = datainfo_new();
 3039     if (octset == NULL) {
 3040     pputs(prn, A_("Out of memory!\n"));
 3041     err = E_ALLOC;
 3042     goto oct_bailout;
 3043     }
 3044 
 3045     octset->n = nrows;
 3046     octset->v = ncols + 1;
 3047 
 3048     if (start_new_Z(octset, 0)) {
 3049     pputs(prn, A_("Out of memory!\n"));
 3050     err = E_ALLOC;
 3051     goto oct_bailout;
 3052     }
 3053 
 3054     rewind(fp);
 3055 
 3056     pprintf(prn, A_("   number of variables: %d\n"), ncols);
 3057     pprintf(prn, A_("   number of observations: %d\n"), nrows);
 3058     pprintf(prn, A_("   number of data blocks: %d\n"), nblocks);
 3059 
 3060     i = 1;
 3061     t = 0;
 3062 
 3063     sprintf(fmt, "# name: %%%ds", VNAMELEN - 1);
 3064 
 3065     while (fgets(line, maxlen, fp) && !err) {
 3066     char *s = line;
 3067     int j;
 3068 
 3069     if (*s == '#') {
 3070         if (sscanf(line, fmt, name) == 1) {
 3071         ;
 3072         } else if (sscanf(line, "# rows: %d", &brows) == 1) {
 3073         t = 0;
 3074         } else if (sscanf(line, "# columns: %d", &bcols) == 1) {
 3075         i += oldbcols;
 3076         oldbcols = bcols;
 3077         }
 3078     }
 3079 
 3080     if (*s == '#' || string_is_blank(s)) {
 3081         continue;
 3082     }
 3083 
 3084     if (t >= octset->n) {
 3085         err = 1;
 3086     }
 3087 
 3088     for (j=0; j<bcols && !err; j++) {
 3089         double x;
 3090         int v = i + j;
 3091 
 3092         if (t == 0) {
 3093         int nnum = (bcols > 1)? j + 1 : 0;
 3094 
 3095         octave_varname(octset->varname[i+j], name, nnum, v);
 3096         }
 3097 
 3098         while (isspace(*s)) s++;
 3099         if (sscanf(s, "%lf", &x) != 1) {
 3100         fprintf(stderr, "error: '%s', didn't get double\n", s);
 3101         err = 1;
 3102         } else {
 3103         octset->Z[v][t] = x;
 3104         while (!isspace(*s)) s++;
 3105         }
 3106     }
 3107     t++;
 3108     }
 3109 
 3110     if (err) {
 3111     pputs(prn, A_("Invalid data file\n"));
 3112     err = E_DATA;
 3113     } else {
 3114     int merge = dset->Z != NULL;
 3115     gretlopt merge_opt = 0;
 3116 
 3117     if (merge && (opt & OPT_T)) {
 3118         merge_opt = OPT_T;
 3119     }
 3120     err = merge_or_replace_data(dset, &octset, merge_opt, prn);
 3121     }
 3122 
 3123  oct_bailout:
 3124 
 3125     if (fp != NULL) {
 3126     fclose(fp);
 3127     }
 3128 
 3129     if (line != NULL) {
 3130     free(line);
 3131     }
 3132 
 3133     if (octset != NULL) {
 3134     clear_datainfo(octset, CLEAR_FULL);
 3135     }
 3136 
 3137     return err;
 3138 }
 3139 
 3140 /**
 3141  * import_other:
 3142  * @fname: name of file.
 3143  * @ftype: type of data file.
 3144  * @dset: pointer to dataset struct.
 3145  * @opt: option flag; see gretl_get_data().
 3146  * @prn: gretl printing struct.
 3147  *
 3148  * Open a data file of a type that requires a special plugin.
 3149  *
 3150  * Returns: 0 on successful completion, non-zero otherwise.
 3151  */
 3152 
 3153 int import_other (const char *fname, GretlFileType ftype,
 3154           DATASET *dset, gretlopt opt, PRN *prn)
 3155 {
 3156     FILE *fp;
 3157     int (*importer) (const char *, DATASET *,
 3158              gretlopt, PRN *);
 3159     int err = 0;
 3160 
 3161     set_alt_gettext_mode(prn);
 3162 
 3163     fp = gretl_fopen(fname, "r");
 3164     if (fp == NULL) {
 3165     pprintf(prn, A_("Couldn't open %s\n"), fname);
 3166     err = E_FOPEN;
 3167     goto bailout;
 3168     }
 3169 
 3170     fclose(fp);
 3171 
 3172     if (ftype == GRETL_OCTAVE) {
 3173     /* plugin not needed */
 3174     return import_octave(fname, dset, opt, prn);
 3175     }
 3176 
 3177     if (ftype == GRETL_WF1) {
 3178     importer = get_plugin_function("wf1_get_data");
 3179     } else if (ftype == GRETL_DTA) {
 3180     importer = get_plugin_function("dta_get_data");
 3181     } else if (ftype == GRETL_SAV) {
 3182     importer = get_plugin_function("sav_get_data");
 3183     } else if (ftype == GRETL_SAS) {
 3184     importer = get_plugin_function("xport_get_data");
 3185     } else if (ftype == GRETL_JMULTI) {
 3186     importer = get_plugin_function("jmulti_get_data");
 3187     } else {
 3188     pprintf(prn, A_("Unrecognized data type"));
 3189     pputc(prn, '\n');
 3190     return E_DATA;
 3191     }
 3192 
 3193     if (importer == NULL) {
 3194         err = 1;
 3195     } else {
 3196     err = (*importer)(fname, dset, opt, prn);
 3197     }
 3198 
 3199  bailout:
 3200 
 3201     return err;
 3202 }
 3203 
 3204 /**
 3205  * import_spreadsheet:
 3206  * @fname: name of file.
 3207  * @ftype: type of data file.
 3208  * @list: list of parameters for spreadsheet import, or NULL.
 3209  * @sheetname: name of specific worksheet, or NULL.
 3210  * @dset: dataset struct.
 3211  * @opt: option flag; see gretl_get_data().
 3212  * @prn: gretl printing struct.
 3213  *
 3214  * Open a data file of a type that requires a special plugin.
 3215  * Acceptable values for @ftype are %GRETL_GNUMERIC,
 3216  * %GRETL_XLS, %GRETL_XLSX and %GRETL_ODS.
 3217  *
 3218  * Returns: 0 on successful completion, non-zero otherwise.
 3219  */
 3220 
 3221 int import_spreadsheet (const char *fname, GretlFileType ftype,
 3222             int *list, char *sheetname,
 3223             DATASET *dset, gretlopt opt, PRN *prn)
 3224 {
 3225     FILE *fp;
 3226     int (*importer) (const char*, int *, char *,
 3227              DATASET *, gretlopt, PRN *);
 3228     int err = 0;
 3229 
 3230     import_na_init();
 3231     set_alt_gettext_mode(prn);
 3232 
 3233     fp = gretl_fopen(fname, "r");
 3234 
 3235     if (fp == NULL) {
 3236     pprintf(prn, A_("Couldn't open %s\n"), fname);
 3237     err = E_FOPEN;
 3238     goto bailout;
 3239     }
 3240 
 3241     fclose(fp);
 3242 
 3243     if (ftype == GRETL_GNUMERIC) {
 3244     importer = get_plugin_function("gnumeric_get_data");
 3245     } else if (ftype == GRETL_XLS) {
 3246     importer = get_plugin_function("xls_get_data");
 3247     } else if (ftype == GRETL_XLSX) {
 3248     importer = get_plugin_function("xlsx_get_data");
 3249     } else if (ftype == GRETL_ODS) {
 3250     importer = get_plugin_function("ods_get_data");
 3251     } else {
 3252     pprintf(prn, A_("Unrecognized data type"));
 3253     pputc(prn, '\n');
 3254     return E_DATA;
 3255     }
 3256 
 3257     if (importer == NULL) {
 3258         err = 1;
 3259     } else {
 3260     gchar *thisdir = g_get_current_dir();
 3261 
 3262     err = (*importer)(fname, list, sheetname, dset, opt, prn);
 3263 
 3264     if (thisdir != NULL) {
 3265         /* come back out of dotdir? */
 3266         gretl_chdir(thisdir);
 3267         g_free(thisdir);
 3268     }
 3269     }
 3270 
 3271  bailout:
 3272 
 3273     return err;
 3274 }
 3275 
 3276 static int is_jmulti_datafile (const char *fname)
 3277 {
 3278     FILE *fp;
 3279     int ret = 0;
 3280 
 3281     fp = gretl_fopen(fname, "r");
 3282 
 3283     if (fp != NULL) {
 3284     char test[128] = {0};
 3285     int gotobs = 0;
 3286     int gotcomm = 0;
 3287     int incomm = 0;
 3288 
 3289     /* look for characteristic C-style comment and
 3290        <obs stuff> field, outside of comment */
 3291 
 3292     while (fgets(test, sizeof test, fp)) {
 3293         if (!incomm && strstr(test, "/*")) {
 3294         gotcomm = 1;
 3295         incomm = 1;
 3296         }
 3297         if (incomm && strstr(test, "*/")) {
 3298         incomm = 0;
 3299         }
 3300         if (!incomm && *test == '<' && strchr(test, '>')) {
 3301         gotobs = 1;
 3302         }
 3303         if (gotcomm && gotobs) {
 3304         ret = 1;
 3305         break;
 3306         }
 3307     }
 3308     fclose(fp);
 3309     }
 3310 
 3311     return ret;
 3312 }
 3313 
 3314 /**
 3315  * gretl_is_pkzip_file:
 3316  * @fname: name of file to examine.
 3317  *
 3318  * Returns: 1 if @fname is readable and is a PKZIP file,
 3319  * else 0.
 3320  */
 3321 
 3322 int gretl_is_pkzip_file (const char *fname)
 3323 {
 3324     FILE *fp;
 3325     char test[3] = {0};
 3326     int ret = 0;
 3327 
 3328     fp = gretl_fopen(fname, "rb");
 3329     if (fp != NULL) {
 3330     if (fread(test, 1, 2, fp) == 2) {
 3331         if (!strcmp(test, "PK")) ret = 1;
 3332     }
 3333     fclose(fp);
 3334     }
 3335 
 3336     return ret;
 3337 }
 3338 
 3339 /**
 3340  * detect_filetype:
 3341  * @fname: the name of the file to test.
 3342  * @opt: OPT_P may be included to permit path-searching if @fname
 3343  * is not an absolute path; in that case the @fname argument
 3344  * may be modified, otherwise it will be left unchanged.
 3345  *
 3346  * Attempts to determine the type of a file to be opened in gretl:
 3347  * data file (of various formats), or command script. If OPT_P
 3348  * is given, the @fname argument must be an array of length
 3349  * at least %MAXLEN: a path may be prepended and in some cases
 3350  * an extension may be appended.
 3351  *
 3352  * Returns: integer code indicating the type of file.
 3353  */
 3354 
 3355 GretlFileType detect_filetype (char *fname, gretlopt opt)
 3356 {
 3357     const char *ext = get_filename_extension(fname);
 3358     GretlFileType ftype = GRETL_UNRECOGNIZED;
 3359 
 3360     if (ext != NULL) {
 3361     /* First try judging the type by extension */
 3362     if (!strcmp(ext, ".inp")) {
 3363         ftype = GRETL_SCRIPT;
 3364     } else if (!strcmp(ext, ".gretl")) {
 3365         if (gretl_is_pkzip_file(fname)) {
 3366         ftype = GRETL_SESSION;
 3367         } else {
 3368         ftype = GRETL_SCRIPT;
 3369         }
 3370     } else {
 3371         ftype = data_file_type_from_extension(ext);
 3372         if (ftype == GRETL_UNRECOGNIZED) {
 3373         /* check for database types */
 3374         if (!strcmp(ext, ".bin")) {
 3375             ftype = GRETL_NATIVE_DB;
 3376         } else if (!strcmp(ext, ".rat")) {
 3377             ftype = GRETL_RATS_DB;
 3378         } else if (!strcmp(ext, ".bn7")) {
 3379             ftype = GRETL_PCGIVE_DB;
 3380         }
 3381         }
 3382     }
 3383     if (ftype != GRETL_UNRECOGNIZED) {
 3384         /* We got a type from the extension, but can we find
 3385            the file "as is"? If so, we're done.
 3386         */
 3387         if (gretl_test_fopen(fname, "r") == 0) {
 3388         return ftype;
 3389         }
 3390     }
 3391     }
 3392 
 3393     if ((opt & OPT_P) && gretl_addpath(fname, 0) != NULL) {
 3394     ext = get_filename_extension(fname);
 3395     if (ext != NULL) {
 3396         /* check again for known data file types */
 3397         ftype = data_file_type_from_extension(ext);
 3398     }
 3399     }
 3400 
 3401     if (ftype == GRETL_UNRECOGNIZED) {
 3402     /* last gasp */
 3403     if (gretl_is_xml_file(fname)) {
 3404         ftype = GRETL_XML_DATA;
 3405     } else if (has_suffix(fname, ".dat") && is_jmulti_datafile(fname)) {
 3406         ftype = GRETL_JMULTI;
 3407     } else {
 3408         /* default to assuming plain text data */
 3409         ftype = GRETL_CSV;
 3410     }
 3411     }
 3412 
 3413     return ftype;
 3414 }
 3415 
 3416 /**
 3417  * check_atof:
 3418  * @numstr: string to check.
 3419  *
 3420  * Returns: 0 if @numstr is blank, or is a valid string representation
 3421  * of a floating point number, else 1.
 3422  */
 3423 
 3424 int check_atof (const char *numstr)
 3425 {
 3426     char *test;
 3427 
 3428     /* accept blank entries */
 3429     if (*numstr == '\0') return 0;
 3430 
 3431     errno = 0;
 3432 
 3433     strtod(numstr, &test);
 3434 
 3435     if (*test == '\0' && errno != ERANGE) return 0;
 3436 
 3437     if (!strcmp(numstr, test)) {
 3438     gretl_errmsg_sprintf(_("'%s' -- no numeric conversion performed!"), numstr);
 3439     return 1;
 3440     }
 3441 
 3442     if (*test != '\0') {
 3443     if (isprint(*test)) {
 3444         gretl_errmsg_sprintf(_("Extraneous character '%c' in data"), *test);
 3445     } else {
 3446         gretl_errmsg_sprintf(_("Extraneous character (0x%x) in data"), *test);
 3447     }
 3448     return 1;
 3449     }
 3450 
 3451     if (errno == ERANGE) {
 3452     gretl_errmsg_sprintf(_("'%s' -- number out of range!"), numstr);
 3453     }
 3454 
 3455     return 1;
 3456 }
 3457 
 3458 /**
 3459  * check_atoi:
 3460  * @numstr: string to check.
 3461  *
 3462  * Returns: 0 if @numstr is blank, or is a valid string representation
 3463  * of an int, else 1.
 3464  */
 3465 
 3466 int check_atoi (const char *numstr)
 3467 {
 3468     long int val;
 3469     char *test;
 3470 
 3471     /* accept blank entries */
 3472     if (*numstr == '\0') return 0;
 3473 
 3474     errno = 0;
 3475 
 3476     val = strtol(numstr, &test, 10);
 3477 
 3478     if (*test == '\0' && errno != ERANGE) return 0;
 3479 
 3480     if (!strcmp(numstr, test)) {
 3481     gretl_errmsg_sprintf(_("'%s' -- no numeric conversion performed!"), numstr);
 3482     return 1;
 3483     }
 3484 
 3485     if (*test != '\0') {
 3486     if (isprint(*test)) {
 3487         gretl_errmsg_sprintf(_("Extraneous character '%c' in data"), *test);
 3488     } else {
 3489         gretl_errmsg_sprintf(_("Extraneous character (0x%x) in data"), *test);
 3490     }
 3491     return 1;
 3492     }
 3493 
 3494     if (errno == ERANGE || val <= INT_MIN || val >= INT_MAX) {
 3495     gretl_errmsg_sprintf(_("'%s' -- number out of range!"), numstr);
 3496     }
 3497 
 3498     return 1;
 3499 }
 3500 
 3501 static int transpose_varname_used (const char *vname,
 3502                    DATASET *dinfo,
 3503                    int imax)
 3504 {
 3505     int i;
 3506 
 3507     for (i=0; i<imax; i++) {
 3508     if (!strcmp(vname, dinfo->varname[i])) {
 3509         return 1;
 3510     }
 3511     }
 3512 
 3513     return 0;
 3514 }
 3515 
 3516 /**
 3517  * transpose_data:
 3518  * @dset: pointer to dataset information struct.
 3519  *
 3520  * Attempts to transpose the current dataset, so that each
 3521  * variable becomes interpreted as an observation and each
 3522  * observation as a variable.
 3523  *
 3524  * Returns: 0 on success, non-zero error code on error.
 3525  */
 3526 
 3527 int transpose_data (DATASET *dset)
 3528 {
 3529     DATASET *tset;
 3530     int k = dset->n + 1;
 3531     int T = dset->v - 1;
 3532     int i, t;
 3533 
 3534     tset = create_new_dataset(k, T, 0);
 3535     if (tset == NULL) {
 3536     return E_ALLOC;
 3537     }
 3538 
 3539     for (i=1; i<dset->v; i++) {
 3540     for (t=0; t<dset->n; t++) {
 3541         tset->Z[t+1][i-1] = dset->Z[i][t];
 3542     }
 3543     }
 3544 
 3545     for (t=0; t<dset->n; t++) {
 3546     int k = t + 1;
 3547     char *targ = tset->varname[k];
 3548 
 3549     if (dset->S != NULL && dset->S[t][0] != '\0') {
 3550         int err;
 3551 
 3552         *targ = '\0';
 3553         strncat(targ, dset->S[t], VNAMELEN - 1);
 3554         gretl_charsub(targ, ' ', '_');
 3555         err = check_varname(targ);
 3556         if (err) {
 3557         sprintf(targ, "v%d", k);
 3558         gretl_error_clear();
 3559         } else if (transpose_varname_used(targ, tset, k)) {
 3560         sprintf(targ, "v%d", k);
 3561         }
 3562     } else {
 3563         sprintf(targ, "v%d", k);
 3564     }
 3565     }
 3566 
 3567     free_Z(dset);
 3568     dset->Z = tset->Z;
 3569 
 3570     clear_datainfo(dset, CLEAR_FULL);
 3571 
 3572     dset->v = k;
 3573     dset->n = T;
 3574     dset->t1 = 0;
 3575     dset->t2 = dset->n - 1;
 3576 
 3577     dset->varname = tset->varname;
 3578     dset->varinfo = tset->varinfo;
 3579 
 3580     dataset_obs_info_default(dset);
 3581 
 3582     free(tset);
 3583 
 3584     return 0;
 3585 }
 3586 
 3587 void dataset_set_regular_markers (DATASET *dset)
 3588 {
 3589     dset->markers = REGULAR_MARKERS;
 3590 }
 3591 
 3592 struct filetype_info {
 3593     GretlFileType type;
 3594     const char *src;
 3595 };
 3596 
 3597 /**
 3598  * dataset_add_import_info:
 3599  * @dset: pointer to dataset information struct.
 3600  * @fname: the name of a file from which data have been imported.
 3601  * @type: code representing the type of the file identified by
 3602  * @fname.
 3603  *
 3604  * On successful import of data from some "foreign" format,
 3605  * add a note to the "descrip" member of the new dataset
 3606  * saying where it came from and when.
 3607  */
 3608 
 3609 void dataset_add_import_info (DATASET *dset, const char *fname,
 3610                   GretlFileType type)
 3611 {
 3612     struct filetype_info ftypes[] = {
 3613     { GRETL_CSV,      "CSV" },
 3614     { GRETL_GNUMERIC, "Gnumeric" },
 3615     { GRETL_XLS,      "Excel" },
 3616     { GRETL_XLSX,     "Excel" },
 3617     { GRETL_ODS,      "Open Document" },
 3618     { GRETL_WF1,      "Eviews" },
 3619     { GRETL_DTA,      "Stata" },
 3620     { GRETL_SAV,      "SPSS" },
 3621     { GRETL_SAS,      "SAS" },
 3622     { GRETL_JMULTI,   "JMulTi" }
 3623     };
 3624     int i, nt = sizeof ftypes / sizeof ftypes[0];
 3625     const char *src = NULL;
 3626     gchar *note = NULL;
 3627     char tstr[48];
 3628 
 3629     for (i=0; i<nt; i++) {
 3630     if (type == ftypes[i].type) {
 3631         src = ftypes[i].src;
 3632         break;
 3633     }
 3634     }
 3635 
 3636     if (src == NULL) {
 3637     return;
 3638     }
 3639 
 3640     print_time(tstr);
 3641 
 3642     if (g_utf8_validate(fname, -1, NULL)) {
 3643     const char *p = strrchr(fname, SLASH);
 3644 
 3645     if (p != NULL) {
 3646         fname = p + 1;
 3647     }
 3648     note = g_strdup_printf(_("Data imported from %s file '%s', %s\n"),
 3649                    src, fname, tstr);
 3650     } else {
 3651     note = g_strdup_printf(_("Data imported from %s, %s\n"),
 3652                    src, tstr);
 3653     }
 3654 
 3655     if (note != NULL) {
 3656     if (dset->descrip == NULL) {
 3657         dset->descrip = gretl_strdup(note);
 3658     } else {
 3659         int dlen = strlen(dset->descrip);
 3660         int nlen = strlen(note);
 3661         char *tmp = realloc(dset->descrip, dlen + nlen + 5);
 3662 
 3663         if (tmp != NULL) {
 3664         dset->descrip = tmp;
 3665         strcat(dset->descrip, "\n\n");
 3666         strncat(dset->descrip, note, nlen);
 3667         }
 3668     }
 3669     g_free(note);
 3670     }
 3671 }
 3672 
 3673 static int is_weekend (int t, int pd, int sat0, int sun0)
 3674 {
 3675     int sat = 0, sun = 0;
 3676 
 3677     /* this is intended to identify weekend days for
 3678        both 7- and 6-day data */
 3679 
 3680     if (sat0 >= 0) {
 3681     sat = (t - sat0) % pd == 0;
 3682     }
 3683 
 3684     if (!sat && sun0 >= 0) {
 3685     sun = (t - sun0) % pd == 0;
 3686     }
 3687 
 3688     return sat || sun;
 3689 }
 3690 
 3691 /* Scan imported daily data for missing values, so as to
 3692    be able to offer the user some options.
 3693 
 3694    Return values:
 3695 
 3696    0 : no missing values
 3697    1 : all weekend data are missing (or possibly just all
 3698        Saturdays, or just all Sundays), but no weekday
 3699        data missing
 3700    2 : as with 1, but also some weekdays missing
 3701    3 : scattering of weekend and/or weekday data missing
 3702 */
 3703 
 3704 int analyse_daily_import (const DATASET *dset, PRN *prn)
 3705 {
 3706     int all_weekends_blank = 0;
 3707     int blank_weekends = 0;
 3708     int blank_weekdays = 0;
 3709     int n_weekdays = 0;
 3710     int n_weekend_days = 0;
 3711     int sat0 = -1, sun0 = -1;
 3712     int i, t, pd = dset->pd;
 3713     int all_missing, weekend;
 3714     int ret = 0;
 3715 
 3716     if (pd > 5) {
 3717     char datestr[OBSLEN];
 3718     int wkday;
 3719 
 3720     /* start by finding first Sat and/or Sun */
 3721     for (t=0; t<dset->n; t++) {
 3722         ntodate(datestr, t, dset);
 3723         wkday = weekday_from_date(datestr);
 3724         if (wkday == 6 && sat0 < 0) {
 3725         sat0 = t;
 3726         } else if (wkday == 0 && sun0 < 0) {
 3727         sun0 = t;
 3728         }
 3729         if (sat0 >= 0 && sun0 >= 0) {
 3730         break;
 3731         } else if (dset->pd == 6 && (sat0 >= 0 || sun0 >= 0)) {
 3732         break;
 3733         }
 3734     }
 3735     all_weekends_blank = 1; /* may be revised below */
 3736     } else {
 3737     /* there are no weekend days in 5-day data */
 3738     weekend = 0;
 3739     }
 3740 
 3741     for (t=0; t<dset->n; t++) {
 3742     if (pd > 5) {
 3743         weekend = is_weekend(t, pd, sat0, sun0);
 3744     }
 3745     all_missing = 1;
 3746     for (i=1; i<dset->v; i++) {
 3747         if (!na(dset->Z[i][t])) {
 3748         all_missing = 0;
 3749         break;
 3750         }
 3751     }
 3752     if (weekend) {
 3753         n_weekend_days++;
 3754         if (!all_missing) {
 3755         /* not all weekend data are missing */
 3756         all_weekends_blank = 0;
 3757         }
 3758     } else {
 3759         n_weekdays++;
 3760     }
 3761     if (all_missing) {
 3762         if (weekend) {
 3763         blank_weekends++;
 3764         } else {
 3765         blank_weekdays++;
 3766         }
 3767     }
 3768     }
 3769 
 3770     if (all_weekends_blank) {
 3771     double misspc = 100.0 * blank_weekdays / (double) n_weekdays;
 3772 
 3773     if (pd == 7) {
 3774         pputs(prn, "This dataset is on 7-day calendar, but weekends are blank.");
 3775     } else {
 3776         pprintf(prn, "This dataset is on 6-day calendar, but %s are blank.",
 3777             sat0 >= 0 ? "Sundays" : "Saturdays");
 3778     }
 3779     ret = 1;
 3780     if (misspc > 0.0) {
 3781         pputc(prn, '\n');
 3782         pputs(prn, "In addition, ");
 3783         if (misspc >= 0.01) {
 3784         pprintf(prn, "%.2f percent of weekday observations are missing.",
 3785             misspc);
 3786         } else {
 3787         pprintf(prn, "%g percent of weekday observations are missing.",
 3788             misspc);
 3789         }
 3790         if (misspc < 10.0) {
 3791         ret = 2;
 3792         }
 3793     }
 3794     } else if (blank_weekdays || blank_weekends) {
 3795     int ndays = n_weekdays + n_weekend_days;
 3796     int nmiss = blank_weekdays + blank_weekends;
 3797     double misspc = 100.0 * nmiss / (double) ndays;
 3798 
 3799     if (misspc >= 0.01) {
 3800         pprintf(prn, "%.2f percent of daily observations are missing.",
 3801             misspc);
 3802     } else {
 3803         pprintf(prn, "%g percent of daily observations are missing.",
 3804             misspc);
 3805     }
 3806     if (misspc < 10) {
 3807         ret = 3;
 3808     }
 3809     }
 3810 
 3811     return ret;
 3812 }