"Fossies" - the Fresh Open Source Software Archive

Member "gretl-2020b/lib/src/dataset.c" (2 Apr 2020, 116374 Bytes) of package /linux/misc/gretl-2020b.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "dataset.c" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 2020a_vs_2020b.

    1 /*
    2  *  gretl -- Gnu Regression, Econometrics and Time-series Library
    3  *  Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
    4  *
    5  *  This program is free software: you can redistribute it and/or modify
    6  *  it under the terms of the GNU General Public License as published by
    7  *  the Free Software Foundation, either version 3 of the License, or
    8  *  (at your option) any later version.
    9  *
   10  *  This program is distributed in the hope that it will be useful,
   11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
   12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   13  *  GNU General Public License for more details.
   14  *
   15  *  You should have received a copy of the GNU General Public License
   16  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
   17  *
   18  */
   19 
   20 #include "libgretl.h"
   21 #include "gretl_func.h"
   22 #include "uservar.h"
   23 #include "gretl_string_table.h"
   24 #include "libset.h"
   25 #include "dbread.h"
   26 
   27 #define DDEBUG 0
   28 #define FULLDEBUG 0
   29 
   30 #define Z_COLS_BORROWED 2
   31 
   32 #define dset_zcols_borrowed(d) (d->auxiliary == Z_COLS_BORROWED)
   33 
   34 struct VARINFO_ {
   35     char *label;
   36     char display_name[MAXDISP];
   37     char parent[VNAMELEN];
   38     VarFlags flags;
   39     char compact_method;
   40     gint64 mtime;
   41     short transform;    /* note: command index of transform */
   42     short lag;
   43     short stack_level;
   44     short midas_period;
   45     char midas_freq;
   46     series_table *st;
   47 };
   48 
   49 static int pad_daily_data (DATASET *dset, int pd, PRN *prn);
   50 
   51 static int dataset_changed;
   52 
   53 /**
   54  * check_dataset_is_changed:
   55  *
   56  * Returns: 1 if the current dataset has been modified since
   57  * the last call to this function, or since the library was
   58  * initialized if this function has not yet been called;
   59  * otherwise 0.
   60  */
   61 
   62 int check_dataset_is_changed (void)
   63 {
   64     int ret = dataset_changed;
   65 
   66     dataset_changed = 0;
   67     return ret;
   68 }
   69 
   70 /**
   71  * set_dataset_is_changed:
   72  *
   73  * Sets the internal "dataset changed" flag to 1
   74  */
   75 
   76 void set_dataset_is_changed (void)
   77 {
   78     if (gretl_function_depth() == 0) {
   79     dataset_changed = 1;
   80     }
   81 }
   82 
   83 static void dataset_set_nobs (DATASET *dset, int n)
   84 {
   85     if (n != dset->n) {
   86     /* if the total number of observations in the dataset
   87        has changed, the current "matrix_mask", if present
   88        (see libset.c), will now be invalid
   89     */
   90     destroy_matrix_mask();
   91     dset->n = n;
   92     }
   93 }
   94 
   95 /**
   96  * free_Z:
   97  * @dset: dataset information.
   98  *
   99  * Does a deep free on the data matrix.
  100  */
  101 
  102 void free_Z (DATASET *dset)
  103 {
  104     if (dset != NULL && dset->Z != NULL) {
  105     int i, v = dset_zcols_borrowed(dset) ? 1 : dset->v;
  106 
  107 #if DDEBUG
  108     fprintf(stderr, "Freeing Z (%p): %d vars\n", (void *) dset->Z, v);
  109 #endif
  110     for (i=0; i<v; i++) {
  111         free(dset->Z[i]);
  112     }
  113     free(dset->Z);
  114     dset->Z = NULL;
  115     }
  116 }
  117 
  118 /**
  119  * dataset_destroy_obs_markers:
  120  * @dset: data information struct.
  121  *
  122  * Frees any allocated observation markers for @dset.
  123  */
  124 
  125 void dataset_destroy_obs_markers (DATASET *dset)
  126 {
  127     int i;
  128 
  129     if (dset->S != NULL) {
  130     for (i=0; i<dset->n; i++) {
  131        free(dset->S[i]);
  132     }
  133     free(dset->S);
  134     dset->S = NULL;
  135     dset->markers = NO_MARKERS;
  136     }
  137 }
  138 
  139 static void free_varinfo (DATASET *dset, int v)
  140 {
  141     if (dset->varinfo[v]->st != NULL) {
  142     series_table_destroy(dset->varinfo[v]->st);
  143     }
  144     if (dset->varinfo[v]->label != NULL) {
  145     free(dset->varinfo[v]->label);
  146     }
  147     free(dset->varinfo[v]);
  148 }
  149 
  150 /**
  151  * clear_datainfo:
  152  * @dset: data information struct.
  153  * @code: either %CLEAR_FULL or %CLEAR_SUBSAMPLE.
  154  *
  155  * Frees the allocated content of a data information struct;
  156  * note that @dset itself is not freed.
  157  */
  158 
  159 void clear_datainfo (DATASET *dset, int code)
  160 {
  161     int i;
  162 
  163     if (dset == NULL) return;
  164 
  165     if (dset->S != NULL) {
  166     dataset_destroy_obs_markers(dset);
  167     }
  168 
  169     if (dset->submask != NULL) {
  170     free_subsample_mask(dset->submask);
  171     dset->submask = NULL;
  172     }
  173 
  174     if (dset->restriction != NULL) {
  175     free(dset->restriction);
  176     dset->restriction = NULL;
  177     }
  178 
  179     if (dset->padmask != NULL) {
  180     free(dset->padmask);
  181     dset->padmask = NULL;
  182     }
  183 
  184     if (dset->pangrps != NULL) {
  185     free(dset->pangrps);
  186     dset->pangrps = NULL;
  187     }
  188 
  189     /* if this is not a sub-sample datainfo, free varnames, labels, etc. */
  190 
  191     if (code == CLEAR_FULL) {
  192     if (dset->varname != NULL) {
  193         for (i=0; i<dset->v; i++) {
  194         free(dset->varname[i]);
  195         }
  196         free(dset->varname);
  197         dset->varname = NULL;
  198     }
  199     if (dset->varinfo != NULL) {
  200         for (i=0; i<dset->v; i++) {
  201         free_varinfo(dset, i);
  202         }
  203         free(dset->varinfo);
  204         dset->varinfo = NULL;
  205     }
  206     if (dset->descrip != NULL) {
  207         free(dset->descrip);
  208         dset->descrip = NULL;
  209     }
  210 
  211     maybe_free_full_dataset(dset);
  212 
  213     dset->v = dset->n = 0;
  214     }
  215 }
  216 
  217 /**
  218  * destroy_dataset:
  219  * @dset: pointer to dataset.
  220  *
  221  * Frees all resources associated with @dset.
  222  */
  223 
  224 void destroy_dataset (DATASET *dset)
  225 {
  226     if (dset != NULL) {
  227     free_Z(dset);
  228     clear_datainfo(dset, CLEAR_FULL);
  229     free(dset);
  230     }
  231 }
  232 
  233 /**
  234  * copy_dataset_obs_info:
  235  * @targ: pointer to target dataset.
  236  * @src: pointer to source dataset.
  237  *
  238  * Sets the "date" or observations information in @targ to that
  239  * found in @src.
  240  */
  241 
  242 void copy_dataset_obs_info (DATASET *targ, const DATASET *src)
  243 {
  244     strcpy(targ->stobs, src->stobs);
  245     strcpy(targ->endobs, src->endobs);
  246     targ->sd0 = src->sd0;
  247     targ->pd = src->pd;
  248     targ->structure = src->structure;
  249 }
  250 
  251 /**
  252  * dataset_obs_info_default:
  253  * @dset: pointer to dataset.
  254  *
  255  * Sets the "date" or observations information in @dset to a
  256  * simple default of cross-sectional data, observations 1 to n,
  257  * where n is the %n element (number of observations) in @dset.
  258  */
  259 
  260 void dataset_obs_info_default (DATASET *dset)
  261 {
  262     strcpy(dset->stobs, "1");
  263     sprintf(dset->endobs, "%d", dset->n);
  264     dset->sd0 = 1.0;
  265     dset->pd = 1;
  266     dset->structure = CROSS_SECTION;
  267 }
  268 
  269 /**
  270  * dataset_allocate_obs_markers:
  271  * @dset: pointer to dataset
  272  *
  273  * Allocates space in @dset for strings indentifying the
  274  * observations and initializes all of the markers to empty
  275  * strings.  Note that These strings have a fixed maximum
  276  * length of #OBSLEN - 1.
  277  *
  278  * Returns: 0 on success, E_ALLOC on error.
  279  */
  280 
  281 int dataset_allocate_obs_markers (DATASET *dset)
  282 {
  283     char **S = NULL;
  284     int err = 0;
  285 
  286     if (dset->S == NULL) {
  287     /* not already allocated */
  288     S = strings_array_new_with_length(dset->n, OBSLEN);
  289     if (S == NULL) {
  290         err = E_ALLOC;
  291     } else {
  292         dset->S = S;
  293     }
  294     }
  295 
  296     if (dset->S != NULL) {
  297     dset->markers = REGULAR_MARKERS;
  298     }
  299 
  300     return err;
  301 }
  302 
  303 static void gretl_varinfo_init (VARINFO *vinfo)
  304 {
  305     vinfo->label = NULL;
  306     vinfo->display_name[0] = '\0';
  307     vinfo->parent[0] = '\0';
  308     vinfo->flags = 0;
  309     vinfo->transform = 0;
  310     vinfo->lag = 0;
  311     vinfo->midas_period = 0;
  312     vinfo->midas_freq = 0;
  313     vinfo->compact_method = COMPACT_NONE;
  314     vinfo->mtime = 0;
  315     vinfo->stack_level = gretl_function_depth();
  316     vinfo->st = NULL;
  317 }
  318 
  319 static void copy_label (char **targ, const char *src)
  320 {
  321     free(*targ);
  322     if (src == NULL) {
  323     *targ = NULL;
  324     } else {
  325     *targ = gretl_strdup(src);
  326     }
  327 }
  328 
  329 static int labels_differ (const char *s1, const char *s2)
  330 {
  331     if ((s1 == NULL && s2 != NULL) || (s1 != NULL && s2 == NULL)) {
  332     return 1;
  333     } else if (s1 != NULL && s2 != NULL) {
  334     return strcmp(s1, s2) != 0;
  335     } else {
  336     return 0;
  337     }
  338 }
  339 
  340 /**
  341  * copy_varinfo:
  342  * @targ: target to which to copy.
  343  * @src: source to copy from.
  344  *
  345  * Copies all relevant information from @src to @targ.
  346  */
  347 
  348 void copy_varinfo (VARINFO *targ, const VARINFO *src)
  349 {
  350     if (src == NULL || targ == NULL) {
  351     return;
  352     }
  353     copy_label(&targ->label, src->label);
  354     strcpy(targ->display_name, src->display_name);
  355     strcpy(targ->parent, src->parent);
  356     targ->flags = src->flags;
  357     targ->transform = src->transform;
  358     targ->lag = src->lag;
  359     targ->midas_period = src->midas_period;
  360     targ->midas_freq = src->midas_freq;
  361     targ->compact_method = src->compact_method;
  362     targ->stack_level = src->stack_level;
  363     if (src->st != NULL) {
  364     targ->st = series_table_copy(src->st);
  365     }
  366 }
  367 
  368 /* For use in the context of returning from a sub-sampled
  369    dataset to the full one: trim off series names and
  370    "varinfo" beyond the index @nv, which gives the number
  371    of series in the full dataset.
  372 */
  373 
  374 int shrink_varinfo (DATASET *dset, int nv)
  375 {
  376     char **vnames;
  377     VARINFO **vi;
  378     int i, err = 0;
  379 
  380     if (nv > dset->v) {
  381     return E_DATA;
  382     } else if (nv == dset->v) {
  383     return 0;
  384     }
  385 
  386     for (i=nv; i<dset->v; i++) {
  387     free(dset->varname[i]);
  388     free(dset->varinfo[i]);
  389     }
  390 
  391     vnames = realloc(dset->varname, nv * sizeof *vnames);
  392     if (vnames == NULL) {
  393     err = E_ALLOC;
  394     } else {
  395     dset->varname = vnames;
  396     }
  397 
  398     vi = realloc(dset->varinfo, nv * sizeof *vi);
  399     if (vi == NULL) {
  400     err = E_ALLOC;
  401     } else {
  402     dset->varinfo = vi;
  403     }
  404 
  405     return err;
  406 }
  407 
  408 /**
  409  * dataset_allocate_varnames:
  410  * @dset: pointer to dataset.
  411  *
  412  * Given a blank @dset, which should have been obtained using
  413  * datainfo_new(), allocate space for the names of variables.
  414  * The @v member of @dset (representing the number of variables,
  415  * including the automatically added constant at position 0) must be
  416  * set before calling this function.
  417  *
  418  * Returns: 0 on sucess, E_ALLOC on failure.
  419  */
  420 
  421 int dataset_allocate_varnames (DATASET *dset)
  422 {
  423     int i, j, v = dset->v;
  424     int err = 0;
  425 
  426     dset->varname = strings_array_new_with_length(v, VNAMELEN);
  427     if (dset->varname == NULL) {
  428     return E_ALLOC;
  429     }
  430 
  431     dset->varinfo = malloc(v * sizeof *dset->varinfo);
  432     if (dset->varinfo == NULL) {
  433     free(dset->varname);
  434     return E_ALLOC;
  435     }
  436 
  437     for (i=0; i<v; i++) {
  438     dset->varinfo[i] = malloc(sizeof **dset->varinfo);
  439     if (dset->varinfo[i] == NULL) {
  440         for (j=0; j<i; j++) {
  441         free(dset->varinfo[j]);
  442         }
  443         free(dset->varinfo);
  444         dset->varinfo = NULL;
  445         err = E_ALLOC;
  446         break;
  447     } else {
  448         gretl_varinfo_init(dset->varinfo[i]);
  449     }
  450     }
  451 
  452     if (!err) {
  453     strcpy(dset->varname[0], "const");
  454     series_set_label(dset, 0, _("auto-generated constant"));
  455     }
  456 
  457     return err;
  458 }
  459 
  460 /**
  461  * datainfo_init:
  462  * @dset: pointer to DATASET struct.
  463  *
  464  * Zeros all members of @dset and sets it as a plain cross-section.
  465  * Designed for use with a DATASET structure that has not been
  466  * obtained via datainfo_new().
  467  */
  468 
  469 void datainfo_init (DATASET *dset)
  470 {
  471     dset->v = 0;
  472     dset->n = 0;
  473     dset->pd = 1;
  474     dset->structure = CROSS_SECTION;
  475     dset->sd0 = 1.0;
  476     dset->t1 = 0;
  477     dset->t2 = 0;
  478     dset->stobs[0] = '\0';
  479     dset->endobs[0] = '\0';
  480 
  481     dset->Z = NULL;
  482     dset->varname = NULL;
  483     dset->varinfo = NULL;
  484 
  485     dset->markers = NO_MARKERS;
  486     dset->modflag = 0;
  487 
  488     dset->S = NULL;
  489     dset->descrip = NULL;
  490     dset->submask = NULL;
  491     dset->restriction = NULL;
  492     dset->padmask = NULL;
  493     dset->pangrps = NULL;
  494     dset->panel_pd = 0;
  495     dset->panel_sd0 = 0;
  496 
  497     dset->auxiliary = 0;
  498     dset->rseed = 0;
  499 }
  500 
  501 /**
  502  * datainfo_new:
  503  *
  504  * Creates a new data information struct pointer from scratch,
  505  * properly initialized as empty (no variables, no observations).
  506  *
  507  * Returns: pointer to data information struct, or NULL on error.
  508  */
  509 
  510 DATASET *datainfo_new (void)
  511 {
  512     DATASET *dset = malloc(sizeof *dset);
  513 
  514     if (dset != NULL) {
  515     datainfo_init(dset);
  516     }
  517 
  518     return dset;
  519 }
  520 
  521 static DATASET *real_create_new_dataset (int nvar, int nobs,
  522                      gretlopt opt)
  523 {
  524     DATASET *dset = datainfo_new();
  525 
  526     if (dset == NULL) return NULL;
  527 
  528     dset->v = nvar;
  529     dset->n = nobs;
  530     dset->Z = NULL;
  531 
  532     if (start_new_Z(dset, opt)) {
  533     free(dset);
  534     return NULL;
  535     }
  536 
  537     if (opt & OPT_M) {
  538     if (dataset_allocate_obs_markers(dset)) {
  539         free_datainfo(dset);
  540         return NULL;
  541     }
  542     }
  543 
  544     dataset_obs_info_default(dset);
  545 
  546     return dset;
  547 }
  548 
  549 /**
  550  * create_new_dataset:
  551  * @nvar: number of variables.
  552  * @nobs: number of observations per variable.
  553  * @markers: 1 if space should be allocated for "case markers" for
  554  * the observations, 0 otherwise.
  555  *
  556  * Allocates space in the dataset to hold the specified number
  557  * of variables and observations.
  558  *
  559  * Returns: pointer to dataset struct, or NULL on error.
  560  */
  561 
  562 DATASET *create_new_dataset (int nvar, int nobs, int markers)
  563 {
  564     gretlopt opt = markers ? OPT_M : OPT_NONE;
  565 
  566     return real_create_new_dataset(nvar, nobs, opt);
  567 }
  568 
  569 DATASET *create_auxiliary_dataset (int nvar, int nobs, gretlopt opt)
  570 {
  571     DATASET *dset = real_create_new_dataset(nvar, nobs, opt);
  572 
  573     if (dset != NULL) {
  574     if (opt & OPT_B) {
  575         dset->auxiliary = Z_COLS_BORROWED;
  576     } else {
  577         dset->auxiliary = 1;
  578     }
  579     }
  580 
  581     return dset;
  582 }
  583 
  584 static double **make_borrowed_Z (int v, int n)
  585 {
  586     double **Z = malloc(v * sizeof *Z);
  587 
  588     if (Z != NULL) {
  589     int i;
  590 
  591     for (i=0; i<v; i++) {
  592         Z[i] = NULL;
  593     }
  594 
  595     Z[0] = malloc(n * sizeof **Z);
  596 
  597     if (Z[0] == NULL) {
  598         free(Z);
  599         Z = NULL;
  600     } else {
  601         for (i=0; i<n; i++) {
  602         Z[0][i] = 1.0;
  603         }
  604     }
  605     }
  606 
  607     return Z;
  608 }
  609 
  610 /**
  611  * allocate_Z:
  612  * @dset: pointer to dataset.
  613  * @opt: may include OPT_B to indicate that the data columns
  614  * will be "borrowed".
  615  *
  616  * Allocates the two-dimensional data array Z,
  617  * based on the v (number of variables) and n (number of
  618  * observations) members of @dset.  The variable at
  619  * position 0 is initialized to all 1s; other variables
  620  * are initialized to #NADBL (unless OPT_B is given).
  621  *
  622  * Returns: 0 on success, E_ALLOC on error.
  623  */
  624 
  625 int allocate_Z (DATASET *dset, gretlopt opt)
  626 {
  627     int i, t;
  628     int err = 0;
  629 
  630     if (dset->Z != NULL) {
  631     fprintf(stderr, "*** error: allocate_Z called with non-NULL Z\n");
  632     }
  633 
  634     if (opt & OPT_B) {
  635     dset->Z = make_borrowed_Z(dset->v, dset->n);
  636     } else {
  637     dset->Z = doubles_array_new(dset->v, dset->n);
  638     }
  639 
  640     if (dset->Z == NULL) {
  641     err = E_ALLOC;
  642     } else if (!(opt & OPT_B)) {
  643     for (i=0; i<dset->v; i++) {
  644         for (t=0; t<dset->n; t++) {
  645         dset->Z[i][t] = (i == 0)? 1.0 : NADBL;
  646         }
  647     }
  648     }
  649 
  650     return err;
  651 }
  652 
  653 /**
  654  * start_new_Z:
  655  * @dset: pointer to dataset.
  656  * @opt: if includes OPT_R we're sub-sampling from a full data set;
  657  * if includes OPT_P, do not null out dset->S and markers.
  658  *
  659  * Initializes the data array within @dset (adding the constant in
  660  * position 0).
  661  *
  662  * Returns: 0 on successful completion, non-zero on error.
  663  */
  664 
  665 int start_new_Z (DATASET *dset, gretlopt opt)
  666 {
  667     if (allocate_Z(dset, opt)) {
  668     return E_ALLOC;
  669     }
  670 
  671     dset->t1 = 0;
  672     dset->t2 = dset->n - 1;
  673 
  674     if (opt & OPT_R) {
  675     /* sub-sampling */
  676     dset->varname = NULL;
  677     dset->varinfo = NULL;
  678     } else if (dataset_allocate_varnames(dset)) {
  679     free_Z(dset);
  680     dset->Z = NULL;
  681     return E_ALLOC;
  682     }
  683 
  684     if (!(opt & OPT_P)) {
  685     dset->S = NULL;
  686     dset->markers = NO_MARKERS;
  687     }
  688 
  689     dset->descrip = NULL;
  690     dset->submask = NULL;
  691     dset->restriction = NULL;
  692     dset->padmask = NULL;
  693     if (!(opt & OPT_R)) {
  694     dset->pangrps = NULL;
  695     }
  696 
  697     return 0;
  698 }
  699 
  700 static int reallocate_markers (DATASET *dset, int n)
  701 {
  702     char **S;
  703     int t;
  704 
  705     S = realloc(dset->S, n * sizeof *S);
  706     if (S == NULL) {
  707     return 1;
  708     }
  709 
  710     for (t=dset->n; t<n; t++) {
  711     S[t] = malloc(OBSLEN);
  712     if (S[t] == NULL) {
  713         int j;
  714 
  715         for (j=dset->n; j<t; j++) {
  716         free(S[j]);
  717         }
  718         free(S);
  719         return 1;
  720     }
  721     S[t][0] = '\0';
  722     }
  723 
  724     dset->S = S;
  725 
  726     return 0;
  727 }
  728 
  729 /* Allow for the possibility of centered seasonal dummies: usually
  730    xon = 1 and xoff = 0, but in the centered case xon = 1 - 1/pd
  731    and xoff = -1/pd.
  732 */
  733 
  734 static int get_xon_xoff (const double *x, int n, int pd, double *xon, double *xoff)
  735 {
  736     double cfac = 1.0 / pd;
  737     double xc = 1.0 - cfac, yc = -cfac;
  738     double x0 = 999, y0 = 999;
  739     int t, ret = 1;
  740 
  741     for (t=0; t<n && ret; t++) {
  742     if (x[t] == 1.0) {
  743         if (x0 == 999) x0 = 1.0;
  744         else if (x[t] != x0) ret = 0;
  745     } else if (x[t] == 0.0) {
  746         if (y0 == 999) y0 = 0.0;
  747         else if (x[t] != y0) ret = 0;
  748     } else if (x[t] == xc) {
  749         if (x0 == 999) x0 = xc;
  750         else if (x[t] != x0) ret = 0;
  751     } else if (x[t] == yc) {
  752         if (y0 == 999) y0 = yc;
  753         else if (x[t] != y0) ret = 0;
  754     } else {
  755         ret = 0;
  756     }
  757     }
  758 
  759     if (ret) {
  760     *xon = x0;
  761     *xoff = y0;
  762     }
  763 
  764     return ret;
  765 }
  766 
  767 static int real_periodic_dummy (const double *x, int n,
  768                 int *pd, int *offset,
  769                 double *pxon, double *pxoff)
  770 {
  771     double xon = 1.0, xoff = 0.0;
  772     int onbak = 0;
  773     int gap = 0;
  774     int trail = 0;
  775     int t, m = n - 1, ret = 1;
  776 
  777     if (!get_xon_xoff(x, n, *pd, &xon, &xoff)) {
  778     return 0;
  779     }
  780 
  781     *pd = -1;
  782     *offset = -1;
  783     trail = 0;
  784 
  785     /* find number of trailing "off" values */
  786     for (t=n-1; t>0; t--) {
  787     if (x[t] == xoff) {
  788         trail++;
  789     } else {
  790         if (x[t] == xon) {
  791         m = t;
  792         } else {
  793         ret = 0;
  794         }
  795         break;
  796     }
  797     }
  798 
  799     /* check for dummyhood and periodicity */
  800     for (t=0; t<=m && ret; t++) {
  801     if (x[t] == xoff) {
  802         onbak = 0;
  803         gap++;
  804     } else if (x[t] == xon) {
  805         if (onbak) {
  806         ret = 0;
  807         } else if (*offset < 0) {
  808         *offset = gap;
  809         } else if (*pd < 0) {
  810         *pd = gap + 1;
  811         if (*pd < *offset + 1) {
  812             ret = 0;
  813         }
  814         } else if (gap != *pd - 1) {
  815         ret = 0;
  816         } else if (gap < trail) {
  817         ret = 0;
  818         }
  819         gap = 0;
  820         onbak = 1;
  821     } else {
  822         ret = 0;
  823         break;
  824     }
  825     }
  826 
  827     if (ret && pxon != NULL && pxoff != NULL) {
  828     *pxon = xon;
  829     *pxoff = xoff;
  830     }
  831 
  832     return ret;
  833 }
  834 
  835 /**
  836  * is_periodic_dummy:
  837  * @x: array to examine.
  838  * @dset: pointer to dataset.
  839  *
  840  * Returns: 1 if @x is a periodic dummy variable,
  841  * 0 otherwise.
  842  */
  843 
  844 int is_periodic_dummy (const double *x, const DATASET *dset)
  845 {
  846     int offset, pd = dset->pd;
  847 
  848     return real_periodic_dummy(x, dset->n, &pd, &offset, NULL, NULL);
  849 }
  850 
  851 static int is_linear_trend (const double *x, int n)
  852 {
  853     int t, ret = 1;
  854 
  855     for (t=1; t<n; t++) {
  856     if (x[t] != x[t-1] + 1.0) {
  857         ret = 0;
  858         break;
  859     }
  860     }
  861 
  862     return ret;
  863 }
  864 
  865 static int is_quadratic_trend (const double *x, int n)
  866 {
  867     double t2;
  868     int t, ret = 1;
  869 
  870     for (t=0; t<n; t++) {
  871     t2 = (t + 1) * (t + 1);
  872     if (x[t] != t2) {
  873         ret = 0;
  874         break;
  875     }
  876     }
  877 
  878     return ret;
  879 }
  880 
  881 /**
  882  * is_trend_variable:
  883  * @x: array to examine.
  884  * @n: number of elements in array.
  885  *
  886  * Returns: 1 if @x is a simple linear trend variable, with each
  887  * observation equal to the preceding observation plus 1, or
  888  * if @x is a quadratic trend starting at 1 for the first
  889  * observation in the data set, and 0 otherwise.
  890  */
  891 
  892 int is_trend_variable (const double *x, int n)
  893 {
  894     int ret = 0;
  895 
  896     if (is_linear_trend(x, n)) {
  897     ret = 1;
  898     } else if (is_quadratic_trend(x, n)) {
  899     ret = 1;
  900     }
  901 
  902     return ret;
  903 }
  904 
  905 static void maybe_extend_trends (DATASET *dset, int oldn)
  906 {
  907     int i, t;
  908 
  909     for (i=1; i<dset->v; i++) {
  910     if (is_linear_trend(dset->Z[i], oldn)) {
  911         for (t=oldn; t<dset->n; t++) {
  912         dset->Z[i][t] = dset->Z[i][t-1] + 1.0;
  913         }
  914     } else if (is_quadratic_trend(dset->Z[i], oldn)) {
  915         for (t=oldn; t<dset->n; t++) {
  916         dset->Z[i][t] = (t + 1) * (t + 1);
  917         }
  918     }
  919     }
  920 }
  921 
  922 static void maybe_extend_dummies (DATASET *dset, int oldn)
  923 {
  924     int pd = dset->pd;
  925     double xon = 1.0, xoff = 0.0;
  926     int offset;
  927     int i, t;
  928 
  929     for (i=1; i<dset->v; i++) {
  930     if (real_periodic_dummy(dset->Z[i], oldn, &pd, &offset, &xon, &xoff)) {
  931         for (t=oldn; t<dset->n; t++) {
  932         dset->Z[i][t] = ((t - offset) % pd)? xoff : xon;
  933         }
  934     }
  935     }
  936 }
  937 
  938 /* regular, not panel-time, version */
  939 
  940 static int real_dataset_add_observations (DATASET *dset, int n,
  941                       gretlopt opt)
  942 {
  943     double *x;
  944     int oldn = dset->n;
  945     int i, t, bign;
  946     int err = 0;
  947 
  948     if (dset_zcols_borrowed(dset)) {
  949     fprintf(stderr, "*** Internal error: modifying borrowed data\n");
  950     return E_DATA;
  951     }
  952 
  953     if (n <= 0) {
  954     return 0;
  955     }
  956 
  957     if (dataset_is_panel(dset) && n % dset->pd != 0) {
  958     return E_PDWRONG;
  959     }
  960 
  961     bign = oldn + n;
  962 
  963     for (i=0; i<dset->v; i++) {
  964     x = realloc(dset->Z[i], bign * sizeof *x);
  965     if (x == NULL) {
  966         return E_ALLOC;
  967     }
  968     dset->Z[i] = x;
  969     for (t=oldn; t<bign; t++) {
  970         dset->Z[i][t] = (i == 0)? 1.0 : NADBL;
  971     }
  972     }
  973 
  974     if (dataset_has_markers(dset)) {
  975     if (opt & OPT_D) {
  976         dataset_destroy_obs_markers(dset);
  977     } else {
  978         if (reallocate_markers(dset, bign)) {
  979         return E_ALLOC;
  980         }
  981         for (t=oldn; t<bign; t++) {
  982         sprintf(dset->S[t], "%d", t + 1);
  983         }
  984     }
  985     }
  986 
  987     if (dset->t2 == dset->n - 1) {
  988     dset->t2 = bign - 1;
  989     }
  990 
  991     dataset_set_nobs(dset, bign);
  992 
  993     if (opt & OPT_A) {
  994     maybe_extend_trends(dset, oldn);
  995     maybe_extend_dummies(dset, oldn);
  996     }
  997 
  998     /* does daily data need special handling? */
  999     ntodate(dset->endobs, bign - 1, dset);
 1000 
 1001     return err;
 1002 }
 1003 
 1004 static int panel_dataset_extend_time (DATASET *dset, int n)
 1005 {
 1006     double *utmp, *vtmp;
 1007     char **S = NULL;
 1008     int newT, oldT = dset->pd;
 1009     int oldn = dset->n;
 1010     int n_units;
 1011     int i, j, s, t, bign;
 1012     size_t usz, vsz;
 1013     int err = 0;
 1014 
 1015     if (!dataset_is_panel(dset)) {
 1016     return E_PDWRONG;
 1017     }
 1018 
 1019     if (dset_zcols_borrowed(dset)) {
 1020     fprintf(stderr, "*** Internal error: modifying borrowed data\n");
 1021     return E_DATA;
 1022     }
 1023 
 1024     if (n <= 0) {
 1025     return 0;
 1026     }
 1027 
 1028     n_units = oldn / oldT;
 1029     newT = oldT + n;
 1030     bign = n_units * newT;
 1031 
 1032     usz = newT * sizeof *utmp;
 1033     vsz = bign * sizeof *vtmp;
 1034 
 1035     utmp = malloc(usz);
 1036     if (utmp == NULL) {
 1037     return E_ALLOC;
 1038     }
 1039 
 1040     if (dataset_has_markers(dset)) {
 1041     S = strings_array_new_with_length(bign, OBSLEN);
 1042     if (S == NULL) {
 1043         free(utmp);
 1044         return E_ALLOC;
 1045     }
 1046     }
 1047 
 1048     for (i=0; i<dset->v; i++) {
 1049     int uconst = 1, utrend = 1, dtrend = 1;
 1050     double xbak = NADBL;
 1051     guint32 dt = 0, dbak = 0;
 1052     int ed_err;
 1053 
 1054     vtmp = malloc(vsz);
 1055     if (vtmp == NULL) {
 1056         err = E_ALLOC;
 1057         goto bailout;
 1058     }
 1059 
 1060     s = 0;
 1061     for (j=0; j<n_units; j++) {
 1062         for (t=0; t<oldT; t++) {
 1063         utmp[t] = dset->Z[i][s++];
 1064         if (dtrend) {
 1065             dt = epoch_day_from_ymd_basic(utmp[t]);
 1066         }
 1067         if (t == 0) {
 1068             xbak = utmp[t];
 1069             dbak = dt;
 1070         } else {
 1071             if (uconst && (utmp[t] != xbak)) {
 1072             uconst = 0;
 1073             }
 1074             if (utrend && (utmp[t] != xbak + 1)) {
 1075             utrend = 0;
 1076             }
 1077             if (dtrend && (dt != dbak + 1)) {
 1078             dtrend = 0;
 1079             }
 1080         }
 1081         xbak = utmp[t];
 1082         dbak = dt;
 1083         }
 1084         for (t=oldT; t<newT; t++) {
 1085         if (i == 0) {
 1086             utmp[t] = 1.0;
 1087         } else if (uconst) {
 1088             utmp[t] = utmp[t-1];
 1089         } else if (utrend) {
 1090             utmp[t] = utmp[t-1] + 1;
 1091         } else if (dtrend) {
 1092             dt = epoch_day_from_ymd_basic(utmp[t-1]);
 1093             utmp[t] = ymd_basic_from_epoch_day(dt+1, 0, &ed_err);
 1094         } else {
 1095             utmp[t] = NADBL;
 1096         }
 1097         }
 1098         memcpy(vtmp + j*newT, utmp, usz);
 1099     }
 1100     free(dset->Z[i]);
 1101     dset->Z[i] = vtmp;
 1102     }
 1103 
 1104     if (S != NULL) {
 1105     int k = 0;
 1106 
 1107     s = 0;
 1108     for (j=0; j<n_units; j++) {
 1109         for (t=0; t<newT; t++) {
 1110         if (t < oldT) {
 1111             strcpy(S[k], dset->S[s++]);
 1112         } else {
 1113             sprintf(S[k], "%d:%d", j+1, t+1);
 1114         }
 1115         k++;
 1116         }
 1117     }
 1118     strings_array_free(dset->S, oldn);
 1119     dset->S = S;
 1120     S = NULL;
 1121     }
 1122 
 1123     if (dset->t2 == dset->n - 1) {
 1124     dset->t2 = bign - 1;
 1125     }
 1126 
 1127     dataset_set_nobs(dset, bign);
 1128     dset->pd = newT;
 1129     ntodate(dset->endobs, bign - 1, dset);
 1130 
 1131  bailout:
 1132 
 1133     free(utmp);
 1134     if (S != NULL) {
 1135     strings_array_free(S, bign);
 1136     }
 1137 
 1138     return err;
 1139 }
 1140 
 1141 /**
 1142  * dataset_add_observations:
 1143  * @dset: pointer to dataset.
 1144  * @n: number of observations to add.
 1145  * @opt: use OPT_A to attempt to recognize and
 1146  * automatically extend simple deterministic variables such
 1147  * as a time trend and periodic dummy variables;
 1148  * use OPT_D to drop any observation markers rather than
 1149  * expanding the set of markers and padding it out with
 1150  * dummy values; use OPT_T to extend in the time dimension
 1151  * in the case of panel data.
 1152  *
 1153  * Extends all series in the dataset by the specified number of
 1154  * extra observations.  The added values are initialized to
 1155  * the missing value code, #NADBL, with the exception of
 1156  * simple deterministic variables when OPT_A is given.
 1157  *
 1158  * Returns: 0 on success, non-zero code on error.
 1159  */
 1160 
 1161 int dataset_add_observations (DATASET *dset, int n, gretlopt opt)
 1162 {
 1163     if (opt & OPT_T) {
 1164     return panel_dataset_extend_time(dset, n);
 1165     } else {
 1166     return real_dataset_add_observations(dset, n, opt);
 1167     }
 1168 }
 1169 
 1170 static int real_insert_observation (int pos, DATASET *dset)
 1171 {
 1172     double *x;
 1173     int n = dset->n + 1;
 1174     int i, t;
 1175     int err = 0;
 1176 
 1177     for (i=0; i<dset->v; i++) {
 1178     x = realloc(dset->Z[i], n * sizeof *x);
 1179     if (x == NULL) {
 1180         return E_ALLOC;
 1181     }
 1182     dset->Z[i] = x;
 1183     for (t=dset->n; t>pos; t--) {
 1184         dset->Z[i][t] = dset->Z[i][t-1];
 1185     }
 1186     dset->Z[i][pos] = (i == 0)? 1.0 : NADBL;
 1187     }
 1188 
 1189     if (dataset_has_markers(dset)) {
 1190     if (reallocate_markers(dset, n)) {
 1191         return E_ALLOC;
 1192     }
 1193     for (t=dset->n; t>pos; t--) {
 1194         strcpy(dset->S[t], dset->S[t-1]);
 1195     }
 1196     sprintf(dset->S[pos], "%d", pos + 1);
 1197     }
 1198 
 1199     if (dset->t2 == dset->n - 1) {
 1200     dset->t2 = n - 1;
 1201     }
 1202 
 1203     dataset_set_nobs(dset, n);
 1204     ntodate(dset->endobs, n - 1, dset);
 1205 
 1206     return err;
 1207 }
 1208 
 1209 /**
 1210  * dataset_drop_observations:
 1211  * @dset: pointer to dataset.
 1212  * @n: number of observations to drop.
 1213  *
 1214  * Deletes @n observations from the end of each series in the
 1215  * dataset.
 1216  *
 1217  * Returns: 0 on success, non-zero code on error.
 1218  */
 1219 
 1220 int dataset_drop_observations (DATASET *dset, int n)
 1221 {
 1222     double *x;
 1223     int i, newn;
 1224 
 1225     if (dset_zcols_borrowed(dset)) {
 1226     fprintf(stderr, "*** Internal error: modifying borrowed data\n");
 1227     return E_DATA;
 1228     }
 1229 
 1230     if (n <= 0) {
 1231     return 0;
 1232     }
 1233 
 1234     if (dataset_is_panel(dset) && n % dset->pd != 0) {
 1235     return E_PDWRONG;
 1236     }
 1237 
 1238     newn = dset->n - n;
 1239 
 1240     if (newn == 0) {
 1241     free_Z(dset);
 1242     clear_datainfo(dset, CLEAR_FULL);
 1243     return 0;
 1244     }
 1245 
 1246     for (i=0; i<dset->v; i++) {
 1247     x = realloc(dset->Z[i], newn * sizeof *x);
 1248     if (x == NULL) {
 1249         return E_ALLOC;
 1250     }
 1251     dset->Z[i] = x;
 1252     }
 1253 
 1254     if (dataset_has_markers(dset)) {
 1255     if (reallocate_markers(dset, newn)) {
 1256         return E_ALLOC;
 1257     }
 1258     }
 1259 
 1260     if (dset->t2 > newn - 1) {
 1261     dset->t2 = newn - 1;
 1262     }
 1263 
 1264     dataset_set_nobs(dset, newn);
 1265 
 1266     /* does daily data need special handling? */
 1267     ntodate(dset->endobs, newn - 1, dset);
 1268 
 1269     return 0;
 1270 }
 1271 
 1272 /**
 1273  * dataset_shrink_obs_range:
 1274  * @dset: pointer to dataset.
 1275  *
 1276  * Truncates the range of observations in the dataset, based on
 1277  * the current values of the t1 and t2 members of @dset.
 1278  *
 1279  * Returns: 0 on success, non-zero code on error.
 1280  */
 1281 
 1282 int dataset_shrink_obs_range (DATASET *dset)
 1283 {
 1284     int offset = dset->t1;
 1285     int newn = dset->t2 - dset->t1 + 1;
 1286     int tail = dset->n - newn;
 1287     int err = 0;
 1288 
 1289     if (dset_zcols_borrowed(dset)) {
 1290     fprintf(stderr, "*** Internal error: modifying borrowed data\n");
 1291     return E_DATA;
 1292     }
 1293 
 1294     if (offset > 0) {
 1295     /* If the revised dataset starts at an offset into
 1296        the original, shift each series back to the start of
 1297        its Z[i] array.
 1298     */
 1299     int i, mvsize;
 1300 
 1301     mvsize = newn * sizeof **dset->Z;
 1302     for (i=0; i<dset->v; i++) {
 1303         memmove(dset->Z[i], dset->Z[i] + offset, mvsize);
 1304     }
 1305 
 1306     if (dataset_has_markers(dset)) {
 1307         for (i=0; i<offset; i++) {
 1308         free(dset->S[i]);
 1309         }
 1310         mvsize = newn * sizeof *dset->S;
 1311         memmove(dset->S, dset->S + offset, mvsize);
 1312     }
 1313 
 1314     if (dset->structure == CROSS_SECTION) {
 1315         ntodate(dset->stobs, 0, dset);
 1316     } else {
 1317         /* FIXME panel? */
 1318         ntodate(dset->stobs, dset->t1, dset);
 1319         dset->sd0 = get_date_x(dset->pd, dset->stobs);
 1320     }
 1321 
 1322     dset->t1 = 0;
 1323     }
 1324 
 1325     err = dataset_drop_observations(dset, tail);
 1326 
 1327     return err;
 1328 }
 1329 
 1330 static int
 1331 dataset_expand_varinfo (int v0, int newvars, DATASET *dset)
 1332 {
 1333     char **varname = NULL;
 1334     VARINFO **varinfo = NULL;
 1335     int bigv = v0 + newvars;
 1336     int i, v, err = 0;
 1337 
 1338     varname = realloc(dset->varname, bigv * sizeof *varname);
 1339     if (varname == NULL) {
 1340     err = E_ALLOC;
 1341     } else {
 1342     dset->varname = varname;
 1343     }
 1344 
 1345     for (i=0; i<newvars && !err; i++) {
 1346     v = v0 + i;
 1347     dset->varname[v] = malloc(VNAMELEN);
 1348     if (dset->varname[v] == NULL) {
 1349         err = E_ALLOC;
 1350     } else {
 1351         dset->varname[v][0] = '\0';
 1352     }
 1353     }
 1354 
 1355     if (!err && dset->varinfo != NULL) {
 1356     varinfo = realloc(dset->varinfo, bigv * sizeof *varinfo);
 1357     if (varinfo == NULL) {
 1358         err = E_ALLOC;
 1359     } else {
 1360         dset->varinfo = varinfo;
 1361     }
 1362     for (i=0; i<newvars && !err; i++) {
 1363         v = v0 + i;
 1364         dset->varinfo[v] = malloc(sizeof **varinfo);
 1365         if (dset->varinfo[v] == NULL) {
 1366         err = E_ALLOC;
 1367         } else {
 1368         gretl_varinfo_init(dset->varinfo[v]);
 1369         }
 1370     }
 1371     }
 1372 
 1373     return err;
 1374 }
 1375 
 1376 /* note: values of series newly added here are left uninitialized:
 1377    that is the responsibility of the caller
 1378 */
 1379 
 1380 static int real_add_series (int newvars, double *x,
 1381                 DATASET *dset)
 1382 {
 1383     double **newZ;
 1384     int v0 = dset->v;
 1385     int i, err = 0;
 1386 
 1387     if (newvars == 0) {
 1388     /* no-op */
 1389     return 0;
 1390     }
 1391 
 1392     newZ = realloc(dset->Z, (v0 + newvars) * sizeof *newZ);
 1393 
 1394 #if DDEBUG
 1395     fprintf(stderr, "real_add_series: add %d vars, Z = %p\n",
 1396         newvars, (void *) newZ);
 1397 #endif
 1398 
 1399     if (newZ == NULL) {
 1400     err = E_ALLOC;
 1401     } else {
 1402     dset->Z = newZ;
 1403     }
 1404 
 1405     if (!err) {
 1406     if (newvars == 1 && x != NULL) {
 1407         /* a single new var, storage pre-allocated */
 1408         newZ[v0] = x;
 1409     } else {
 1410         for (i=0; i<newvars && !err; i++) {
 1411         newZ[v0+i] = malloc(dset->n * sizeof **newZ);
 1412         if (newZ[v0+i] == NULL) {
 1413             err = E_ALLOC;
 1414         }
 1415         }
 1416     }
 1417     }
 1418 
 1419     if (!err && dset != fetch_full_dataset()) {
 1420     /* don't expand varinfo if we're adding a series
 1421        to the full dataset when currently sub-sampled,
 1422        since in that case varinfo is shared between
 1423        the two datasets
 1424     */
 1425     err = dataset_expand_varinfo(v0, newvars, dset);
 1426     }
 1427 
 1428     if (!err) {
 1429     dset->v += newvars;
 1430     }
 1431 
 1432     return err;
 1433 }
 1434 
 1435 /**
 1436  * dataset_add_series:
 1437  * @dset: pointer to dataset.
 1438  * @newvars: number of series to add.
 1439  *
 1440  * Adds space for the specified number of additional series
 1441  * in the dataset. Values are initialized to zero.
 1442  *
 1443  * Returns: 0 on success, E_ALLOC on error.
 1444  */
 1445 
 1446 int dataset_add_series (DATASET *dset, int newvars)
 1447 {
 1448     int v0 = dset->v;
 1449     int err;
 1450 
 1451     if (dset_zcols_borrowed(dset)) {
 1452     fprintf(stderr, "*** Internal error: modifying borrowed data\n");
 1453     return E_DATA;
 1454     }
 1455 
 1456     err = real_add_series(newvars, NULL, dset);
 1457 
 1458     if (!err) {
 1459     int i, v, t;
 1460 
 1461     for (i=0; i<newvars; i++) {
 1462         v = v0 + i;
 1463         for (t=0; t<dset->n; t++) {
 1464         dset->Z[v][t] = 0.0;
 1465         }
 1466     }
 1467     }
 1468 
 1469     return err;
 1470 }
 1471 
 1472 /**
 1473  * dataset_add_NA_series:
 1474  * @dset: pointer to dataset.
 1475  * @newvars: number of series to add.
 1476  *
 1477  * Adds space for the specified number of additional series
 1478  * in the dataset. Values are initialized to NA.
 1479  *
 1480  * Returns: 0 on success, E_ALLOC on error.
 1481  */
 1482 
 1483 int dataset_add_NA_series (DATASET *dset, int newvars)
 1484 {
 1485     int v0 = dset->v;
 1486     int err;
 1487 
 1488     if (dset_zcols_borrowed(dset)) {
 1489     fprintf(stderr, "*** Internal error: modifying borrowed data\n");
 1490     return E_DATA;
 1491     }
 1492 
 1493     err = real_add_series(newvars, NULL, dset);
 1494 
 1495     if (!err) {
 1496     int i, v, t;
 1497 
 1498     for (i=0; i<newvars; i++) {
 1499         v = v0 + i;
 1500         for (t=0; t<dset->n; t++) {
 1501         dset->Z[v][t] = NADBL;
 1502         }
 1503     }
 1504     }
 1505 
 1506     return err;
 1507 }
 1508 
 1509 /**
 1510  * dataset_add_allocated_series:
 1511  * @dset: pointer to dataset.
 1512  * @x: one-dimensional data array.
 1513  *
 1514  * Adds @x as an additional series in the dataset.
 1515  * The array @x is not copied; it should be treated as
 1516  * belonging to @dset after this operation.
 1517  *
 1518  * Returns: 0 on success, E_ALLOC on error.
 1519  */
 1520 
 1521 int dataset_add_allocated_series (DATASET *dset, double *x)
 1522 {
 1523     if (dset_zcols_borrowed(dset)) {
 1524     fprintf(stderr, "*** Internal error: modifying borrowed data\n");
 1525     return E_DATA;
 1526     } else {
 1527     return real_add_series(1, x, dset);
 1528     }
 1529 }
 1530 
 1531 /**
 1532  * dataset_add_series_as:
 1533  * @dset: pointer to dataset.
 1534  * @x: array to be added.
 1535  * @name: name to give the new variable.
 1536  *
 1537  * Adds to the dataset a new series with name @name and
 1538  * values given by @x.  The new variable is added at one
 1539  * level "deeper" (in terms of function execution) than the
 1540  * current level.  This is for use with user-defined functions.
 1541  *
 1542  * Returns: 0 on success, E_ALLOC on error.
 1543  */
 1544 
 1545 int dataset_add_series_as (DATASET *dset, double *x, const char *name)
 1546 {
 1547     int v, t, err = 0;
 1548 
 1549     if (dset_zcols_borrowed(dset)) {
 1550     fprintf(stderr, "*** Internal error: modifying borrowed data\n");
 1551     return E_DATA;
 1552     }
 1553 
 1554     if (dset->varinfo == NULL) {
 1555     gretl_errmsg_set(_("Please open a data file first"));
 1556     return 1;
 1557     }
 1558 
 1559 #if DDEBUG
 1560     fprintf(stderr, "dataset_add_series_as: incoming Z=%p, name='%s'\n",
 1561         (void *) dset->Z, name);
 1562 #endif
 1563 
 1564     err = real_add_series(1, NULL, dset);
 1565 
 1566     if (!err) {
 1567     v = dset->v - 1;
 1568     for (t=0; t<dset->n; t++) {
 1569         dset->Z[v][t] = x[t];
 1570     }
 1571     strcpy(dset->varname[v], name);
 1572     dset->varinfo[v]->stack_level += 1;
 1573     }
 1574 
 1575     return err;
 1576 }
 1577 
 1578 /**
 1579  * dataset_copy_series_as:
 1580  * @dset: pointer to dataset.
 1581  * @v: index number of variable to copy.
 1582  * @name: name to give the copy.
 1583  *
 1584  * Makes a copy of series @v under the name @name.
 1585  * The copy exists in a variable namespace one level "deeper"
 1586  * (in terms of function execution) than the variable being copied.
 1587  * This is for use with user-defined functions: a variable
 1588  * supplied to a function as an argument is copied into the
 1589  * function's namespace under the name it was given as a
 1590  * parameter.
 1591  *
 1592  * Returns: 0 on success, E_ALLOC on error.
 1593  */
 1594 
 1595 int dataset_copy_series_as (DATASET *dset, int v, const char *name)
 1596 {
 1597     int t, err;
 1598 
 1599     err = real_add_series(1, NULL, dset);
 1600 
 1601     if (!err) {
 1602     int vnew = dset->v - 1;
 1603 
 1604     for (t=0; t<dset->n; t++) {
 1605         dset->Z[vnew][t] = dset->Z[v][t];
 1606     }
 1607     strcpy(dset->varname[vnew], name);
 1608     copy_varinfo(dset->varinfo[vnew], dset->varinfo[v]);
 1609     if (dset->varinfo[v]->flags & VAR_LISTARG) {
 1610         dset->varinfo[vnew]->flags &= ~VAR_LISTARG;
 1611     }
 1612     dset->varinfo[vnew]->stack_level = gretl_function_depth() + 1;
 1613 #if 0
 1614     fprintf(stderr, "copied var %d ('%s', level %d) as var %d ('%s', level %d): ",
 1615         v, dset->varname[v], dset->varinfo[v]->stack_level,
 1616         vnew, name, dset->varinfo[vnew]->stack_level);
 1617     fprintf(stderr, "Z[%d][0] = %g\n", vnew, dset->Z[vnew][0]);
 1618 #endif
 1619     }
 1620 
 1621     return err;
 1622 }
 1623 
 1624 enum {
 1625     DROP_NORMAL,
 1626     DROP_SPECIAL
 1627 };
 1628 
 1629 /* DROP_SPECIAL is used when deleting variables from the "full" shadow
 1630    of a sub-sampled dataset, after deleting those same variables from
 1631    the sub-sampled version: in that case we don't mess with the
 1632    pointer elements of the DATASET struct, because these are shared
 1633    between the full and sub-sampled versions.
 1634 */
 1635 
 1636 static int shrink_dataset_to_size (DATASET *dset, int nv, int drop)
 1637 {
 1638     double **newZ;
 1639 
 1640 #if DDEBUG
 1641     fprintf(stderr, "shrink_dataset_to_size: dset at %p, dset->v=%d, nv=%d\n"
 1642         " drop = %s\n", (void *) dset, dset->v, nv,
 1643         (drop == DROP_NORMAL)? "DROP_NORMAL" : "DROP_SPECIAL");
 1644 #endif
 1645 
 1646     if (drop == DROP_NORMAL) {
 1647     char **varname;
 1648     VARINFO **varinfo;
 1649 
 1650     varname = realloc(dset->varname, nv * sizeof *varname);
 1651     if (varname == NULL) {
 1652         return E_ALLOC;
 1653     }
 1654     dset->varname = varname;
 1655 
 1656     varinfo = realloc(dset->varinfo, nv * sizeof *varinfo);
 1657     if (varinfo == NULL) {
 1658         return E_ALLOC;
 1659     }
 1660     dset->varinfo = varinfo;
 1661     }
 1662 
 1663     newZ = realloc(dset->Z, nv * sizeof *newZ);
 1664     if (newZ == NULL) {
 1665     return E_ALLOC;
 1666     }
 1667 
 1668     dset->Z = newZ;
 1669     dset->v = nv;
 1670 
 1671     return 0;
 1672 }
 1673 
 1674 static int vars_renumbered (const int *list, DATASET *dset,
 1675                 int dmin)
 1676 {
 1677     int i, ndel = 0;
 1678 
 1679     for (i=dmin; i<dset->v; i++) {
 1680     if (in_gretl_list(list, i)) {
 1681         ndel++;
 1682     } else if (ndel > 0 && !series_is_hidden(dset, i)) {
 1683         return 1;
 1684     }
 1685     }
 1686 
 1687     return 0;
 1688 }
 1689 
 1690 int overwrite_err (const char *name)
 1691 {
 1692     gretl_errmsg_sprintf(_("The variable %s is read-only"), name);
 1693 
 1694     return E_DATA;
 1695 }
 1696 
 1697 /**
 1698  * series_is_parent:
 1699  * @dset: dataset information.
 1700  * @v: ID number of series to test.
 1701  *
 1702  * Returns: 1 if variable @v is "parent" to a transformed
 1703  * variable (e.g. a log, lag or difference), othewise 0.
 1704  */
 1705 
 1706 int series_is_parent (const DATASET *dset, int v)
 1707 {
 1708     const char *s = dset->varname[v];
 1709     int i;
 1710 
 1711     if (*s == '\0') {
 1712     return 0;
 1713     }
 1714 
 1715     for (i=1; i<dset->v; i++) {
 1716     if (i != v && !strcmp(s, dset->varinfo[i]->parent)) {
 1717         return 1;
 1718     }
 1719     }
 1720 
 1721     return 0;
 1722 }
 1723 
 1724 /**
 1725  * dataset_rename_series:
 1726  * @dset: dataset information.
 1727  * @v: ID number of the series to be renamed.
 1728  * @name: new name to give the series.
 1729  *
 1730  * Returns: 0 on success, non-zero on error.
 1731  */
 1732 
 1733 int dataset_rename_series (DATASET *dset, int v, const char *name)
 1734 {
 1735     int err = 0;
 1736 
 1737     if (v <= 0 || v >= dset->v || name == NULL) {
 1738     err = E_DATA;
 1739     } else {
 1740     err = check_varname(name);
 1741     }
 1742 
 1743     if (!err) {
 1744     GretlType type;
 1745 
 1746     type = user_var_get_type_by_name(name);
 1747     if (type != GRETL_TYPE_NONE) {
 1748         gretl_errmsg_set("There is already an object of this name");
 1749         err = E_DATA;
 1750     }
 1751     }
 1752 
 1753     if (!err && current_series_index(dset, name) >= 0) {
 1754     gretl_errmsg_set("There is already a series of this name");
 1755     err = E_DATA;
 1756     }
 1757 
 1758     if (!err && (object_is_const(dset->varname[v], v) ||
 1759          series_is_parent(dset, v))) {
 1760     err = overwrite_err(dset->varname[v]);
 1761     }
 1762 
 1763     if (!err && strcmp(dset->varname[v], name)) {
 1764     dset->varname[v][0] = '\0';
 1765     strncat(dset->varname[v], name, VNAMELEN-1);
 1766     set_dataset_is_changed();
 1767     }
 1768 
 1769     return err;
 1770 }
 1771 
 1772 /**
 1773  * dataset_replace_series:
 1774  * @dset: pointer to dataset.
 1775  * @v: ID number of the series to be replaced.
 1776  * @x: replacement values.
 1777  * @descrip: replacement description.
 1778  * @flag: if = DS_GRAB_VALUES then replace dset->Z[@v]
 1779  * with @x, otherwise copy the values in @x to dset->Z[@v].
 1780  *
 1781  * Replaces the description and numerical content of
 1782  * series @v with the information provided.
 1783  *
 1784  * Returns: 0 on success, non-zero on error.
 1785  */
 1786 
 1787 int dataset_replace_series (DATASET *dset, int v,
 1788                 double *x, const char *descrip,
 1789                 DataCopyFlag flag)
 1790 {
 1791     if (v < 0 || v >= dset->v) {
 1792     /* out of bounds */
 1793     return E_DATA;
 1794     }
 1795 
 1796     if (object_is_const(dset->varname[v], v) ||
 1797     series_is_parent(dset, v)) {
 1798     return overwrite_err(dset->varname[v]);
 1799     }
 1800 
 1801     gretl_varinfo_init(dset->varinfo[v]);
 1802     series_set_label(dset, v, descrip);
 1803 
 1804     if (flag == DS_GRAB_VALUES) {
 1805     free(dset->Z[v]);
 1806     dset->Z[v] = x;
 1807     } else {
 1808     int t;
 1809 
 1810     for (t=0; t<dset->n; t++) {
 1811         dset->Z[v][t] = x[t];
 1812     }
 1813     }
 1814 
 1815     set_dataset_is_changed();
 1816 
 1817     return 0;
 1818 }
 1819 
 1820 /**
 1821  * dataset_replace_series_data:
 1822  * @dset: pointer to dataset.
 1823  * @v: ID number of the series to be modified.
 1824  * @x: replacement values.
 1825  * @t1: start of sample range.
 1826  * @t2: end of sample range.
 1827  * @descrip: replacement description.
 1828  *
 1829  * Replaces the description and numerical content of
 1830  * series @v over the given sample range, with the
 1831  * information provided.
 1832  *
 1833  * Returns: 0 on success, non-zero on error.
 1834  */
 1835 
 1836 int dataset_replace_series_data (DATASET *dset, int v,
 1837                  const double *x,
 1838                  int t1, int t2,
 1839                  const char *descrip)
 1840 {
 1841     int t, s;
 1842 
 1843     if (v < 0 || v >= dset->v) {
 1844     /* out of bounds */
 1845     return E_DATA;
 1846     }
 1847 
 1848     if (object_is_const(dset->varname[v], v) ||
 1849     series_is_parent(dset, v)) {
 1850     return overwrite_err(dset->varname[v]);
 1851     }
 1852 
 1853     gretl_varinfo_init(dset->varinfo[v]);
 1854     series_set_label(dset, v, descrip);
 1855 
 1856     s = 0;
 1857     for (t=t1; t<=t2; t++) {
 1858     dset->Z[v][t] = x[s++];
 1859     }
 1860 
 1861     set_dataset_is_changed();
 1862 
 1863     return 0;
 1864 }
 1865 
 1866 static int real_drop_listed_vars (int *list, DATASET *dset,
 1867                   int *renumber, int drop,
 1868                   PRN *prn)
 1869 {
 1870     int oldv = dset->v, vmax = dset->v;
 1871     char vname[VNAMELEN] = {0};
 1872     int d0, d1;
 1873     int delmin = oldv;
 1874     int i, v, ndel = 0;
 1875     int err = 0;
 1876 
 1877     if (renumber != NULL) {
 1878     *renumber = 0;
 1879     }
 1880 
 1881     if (list == NULL || list[0] == 0) {
 1882     /* no-op */
 1883     return 0;
 1884     }
 1885 
 1886     d0 = list[0];
 1887 
 1888     check_variable_deletion_list(list, dset);
 1889     d1 = list[0];
 1890     if (prn != NULL && d1 == 1) {
 1891     strcpy(vname, dset->varname[list[1]]);
 1892     }
 1893 
 1894     if (d1 == 0) {
 1895     goto finish;
 1896     }
 1897 
 1898 #if DDEBUG
 1899     fprintf(stderr, "real_drop_listed_variables: dropping %d vars:\n",
 1900         list[0]);
 1901 #endif
 1902 
 1903     /* check that no vars to be deleted are marked "const", and do
 1904        some preliminary accounting while we're at it */
 1905 
 1906     for (i=1; i<=list[0]; i++) {
 1907     v = list[i];
 1908     if (v > 0 && v < oldv) {
 1909         if (object_is_const(dset->varname[v], v)) {
 1910         return overwrite_err(dset->varname[v]);
 1911         }
 1912         if (v < delmin) {
 1913         delmin = v;
 1914         }
 1915         ndel++;
 1916     }
 1917     }
 1918 
 1919     if (ndel == 0) {
 1920     return 0;
 1921     }
 1922 
 1923     if (renumber != NULL) {
 1924     *renumber = vars_renumbered(list, dset, delmin);
 1925     }
 1926 
 1927 #if DDEBUG
 1928     fprintf(stderr, "real_drop_listed_variables: lowest ID of deleted var"
 1929         " = %d\n", delmin);
 1930 #endif
 1931 
 1932     /* free and set to NULL all the vars to be deleted */
 1933     for (i=1; i<=list[0]; i++) {
 1934     v = list[i];
 1935     if (v > 0 && v < oldv) {
 1936         free(dset->Z[v]);
 1937         dset->Z[v] = NULL;
 1938         if (drop == DROP_NORMAL) {
 1939         free(dset->varname[v]);
 1940         free(dset->varinfo[v]);
 1941         }
 1942     }
 1943     }
 1944 
 1945     /* repack pointers if necessary */
 1946 
 1947     for (v=1; v<vmax; v++) {
 1948     if (dset->Z[v] == NULL) {
 1949         int gap = 1;
 1950 
 1951         for (i=v+1; i<vmax; i++) {
 1952         if (dset->Z[i] == NULL) {
 1953             gap++;
 1954         } else {
 1955             break;
 1956         }
 1957         }
 1958 
 1959         if (i < vmax) {
 1960         vmax -= gap;
 1961         for (i=v; i<vmax; i++) {
 1962             if (drop == DROP_NORMAL) {
 1963             dset->varname[i] = dset->varname[i + gap];
 1964             dset->varinfo[i] = dset->varinfo[i + gap];
 1965             }
 1966             dset->Z[i] = dset->Z[i + gap];
 1967         }
 1968         } else {
 1969         /* deleting all subsequent vars: done */
 1970         break;
 1971         }
 1972     }
 1973     }
 1974 
 1975     err = shrink_dataset_to_size(dset, oldv - ndel, drop);
 1976 
 1977  finish:
 1978 
 1979     /* report results, if appropriate */
 1980 
 1981     if (!err && prn != NULL) {
 1982     if (d0 == d1) {
 1983         if (gretl_messages_on()) {
 1984         if (*vname != '\0') {
 1985             pprintf(prn, _("Deleted %s"), vname);
 1986         } else {
 1987             pprintf(prn, _("Deleted %d variables"), d1);
 1988         }
 1989         pputc(prn, '\n');
 1990         }
 1991     } else {
 1992         if (d1 == 0) {
 1993         pputs(prn, _("No variables were deleted"));
 1994         } else if (*vname != '\0') {
 1995         pprintf(prn, _("Deleted %s"), vname);
 1996         } else {
 1997         pprintf(prn, _("Deleted %d variables"), d1);
 1998         }
 1999         pprintf(prn, " (%s)\n", _("some data were in use"));
 2000     }
 2001     }
 2002 
 2003     return err;
 2004 }
 2005 
 2006 static int *make_dollar_list (DATASET *dset, int *err)
 2007 {
 2008     int *list = NULL;
 2009     int i;
 2010 
 2011     for (i=1; i<dset->v; i++) {
 2012     if (dset->varname[i][0] == '$') {
 2013         list = gretl_list_append_term(&list, i);
 2014         if (list == NULL) {
 2015         *err = E_ALLOC;
 2016         break;
 2017         }
 2018     }
 2019     }
 2020 
 2021     return list;
 2022 }
 2023 
 2024 /**
 2025  * dataset_drop_listed_variables:
 2026  * @list: list of variable to drop, by ID number.
 2027  * @dset: pointer to dataset.
 2028  * @renumber: location for return of information on whether
 2029  * remaining variables have been renumbered as a result, or
 2030  * NULL.
 2031  * @prn: pointer to printing struct.
 2032  *
 2033  * Deletes the variables given in @list from the dataset.  Remaining
 2034  * variables may have their ID numbers changed as a consequence. If
 2035  * @renumber is not NULL, this location receives 1 in case variables
 2036  * have been renumbered, 0 otherwise.
 2037  *
 2038  * Returns: 0 on success, E_ALLOC on error.
 2039  */
 2040 
 2041 int dataset_drop_listed_variables (int *list,
 2042                    DATASET *dset,
 2043                    int *renumber,
 2044                    PRN *prn)
 2045 {
 2046     int oldv = dset->v;
 2047     int *dlist = NULL;
 2048     int dupv, free_dlist = 0;
 2049     int err = 0;
 2050 
 2051     if (dset->n == 0 || dset->v == 0) {
 2052     return E_NODATA;
 2053     }
 2054 
 2055     if (dset_zcols_borrowed(dset)) {
 2056     fprintf(stderr, "*** Internal error: modifying borrowed data\n");
 2057     return E_DATA;
 2058     }
 2059 
 2060     if (list == NULL) {
 2061     /* signal to drop internal "$" variables */
 2062     dlist = make_dollar_list(dset, &err);
 2063     if (err) {
 2064         return err;
 2065     } else if (dlist == NULL) {
 2066         /* no-op */
 2067         return 0;
 2068     }
 2069     free_dlist = 1;
 2070     } else if (list[0] == 0) {
 2071     /* no-op */
 2072     return 0;
 2073     } else {
 2074     dlist = list;
 2075     }
 2076 
 2077     dupv = gretl_list_duplicates(dlist, DELEET);
 2078     if (dupv >= 0) {
 2079     gretl_errmsg_sprintf(_("variable %d duplicated in the "
 2080                    "command list."), dupv);
 2081     return E_DATA;
 2082     }
 2083 
 2084     err = real_drop_listed_vars(dlist, dset, renumber,
 2085                 DROP_NORMAL, prn);
 2086 
 2087     if (dlist[0] > 0) {
 2088     if (!err && !dset->auxiliary) {
 2089         err = gretl_lists_revise(dlist, 0);
 2090     }
 2091 
 2092     if (!err && complex_subsampled()) {
 2093         DATASET *fdset = fetch_full_dataset();
 2094 
 2095         err = real_drop_listed_vars(dlist, fdset, NULL,
 2096                     DROP_SPECIAL, NULL);
 2097     }
 2098     }
 2099 
 2100     if (free_dlist) {
 2101     free(dlist);
 2102     } else if (dset->v != oldv) {
 2103     set_dataset_is_changed();
 2104     }
 2105 
 2106     return err;
 2107 }
 2108 
 2109 /**
 2110  * dataset_drop_variable:
 2111  * @v: ID number of variable to drop.
 2112  * @dset: pointer to dataset.
 2113  *
 2114  * Deletes variable @v from the dataset.
 2115  *
 2116  * Returns: 0 on success, E_ALLOC on error.
 2117  */
 2118 
 2119 int dataset_drop_variable (int v, DATASET *dset)
 2120 {
 2121     int list[2] = {1, v};
 2122 
 2123     if (v <= 0 || v >= dset->v) {
 2124     return E_DATA;
 2125     }
 2126 
 2127     if (dset_zcols_borrowed(dset)) {
 2128     fprintf(stderr, "*** Internal error: modifying borrowed data\n");
 2129     return E_DATA;
 2130     }
 2131 
 2132     return dataset_drop_listed_variables(list, dset, NULL, NULL);
 2133 }
 2134 
 2135 /**
 2136  * dataset_renumber_variable:
 2137  * @v_old: original ID number of variable.
 2138  * @v_new: new ID number.
 2139  * @dset: dataset information.
 2140  *
 2141  * Moves the variable that was originally at position @v_old
 2142  * in the datset to position @v_new, renumbering other
 2143  * variables as required.
 2144  *
 2145  * Returns: 0 on success, error code on error;
 2146  */
 2147 
 2148 int dataset_renumber_variable (int v_old, int v_new,
 2149                    DATASET *dset)
 2150 {
 2151     double *x;
 2152     VARINFO *vinfo;
 2153     char vname[VNAMELEN];
 2154     int i;
 2155 
 2156     if (complex_subsampled()) {
 2157     /* too tricky */
 2158     gretl_errmsg_set(_("dataset is subsampled"));
 2159     return E_DATA;
 2160     }
 2161 
 2162     if (dset_zcols_borrowed(dset)) {
 2163     fprintf(stderr, "*** Internal error: modifying borrowed data\n");
 2164     return E_DATA;
 2165     }
 2166 
 2167     if (v_old < 1 || v_old > dset->v - 1 ||
 2168     v_new < 1 || v_new > dset->v - 1) {
 2169     /* out of bounds */
 2170     return E_DATA;
 2171     }
 2172 
 2173     if (v_old == v_new) {
 2174     /* no-op */
 2175     return 0;
 2176     }
 2177 
 2178     x = dset->Z[v_old];
 2179     vinfo = dset->varinfo[v_old];
 2180     strcpy(vname, dset->varname[v_old]);
 2181 
 2182     if (v_new < v_old) {
 2183     /* moving up in ordering */
 2184     for (i=v_old; i>v_new; i--) {
 2185         dset->Z[i] = dset->Z[i-1];
 2186         strcpy(dset->varname[i], dset->varname[i-1]);
 2187         dset->varinfo[i] = dset->varinfo[i-1];
 2188     }
 2189     } else {
 2190     /* moving down in ordering */
 2191     for (i=v_old; i<v_new; i++) {
 2192         dset->Z[i] = dset->Z[i+1];
 2193         strcpy(dset->varname[i], dset->varname[i+1]);
 2194         dset->varinfo[i] = dset->varinfo[i+1];
 2195     }
 2196     }
 2197 
 2198     dset->Z[v_new] = x;
 2199     strcpy(dset->varname[v_new], vname);
 2200     dset->varinfo[v_new] = vinfo;
 2201 
 2202     set_dataset_is_changed();
 2203 
 2204     return 0;
 2205 }
 2206 
 2207 /**
 2208  * dataset_destroy_hidden_variables:
 2209  * @dset: pointer to dataset.
 2210  * @vmin: do not drop variables with ID numbers less than this.
 2211  *
 2212  * Deletes from the dataset any "hidden" variables that have
 2213  * been added automatically (for example, auto-generated variables
 2214  * used for the x-axis in graph plotting), and that have ID
 2215  * numbers greater than or equal to @vmin.  Never deletes the
 2216  * automatically generated constant (ID number 0).
 2217  *
 2218  * Returns: 0 on success, E_ALLOC on error.
 2219  */
 2220 
 2221 int dataset_destroy_hidden_variables (DATASET *dset, int vmin)
 2222 {
 2223     int i, nhid = 0;
 2224     int err = 0;
 2225 
 2226     if (vmin <= 1) vmin = 1;
 2227 
 2228     for (i=vmin; i<dset->v; i++) {
 2229     if (series_is_hidden(dset, i)) {
 2230         nhid++;
 2231     }
 2232     }
 2233 
 2234     if (nhid > 0) {
 2235     int *list = gretl_list_new(nhid);
 2236 
 2237     if (list == NULL) {
 2238         err = 1;
 2239     } else {
 2240         int j = 1;
 2241 
 2242         for (i=vmin; i<dset->v; i++) {
 2243         if (series_is_hidden(dset, i)) {
 2244             list[j++] = i;
 2245         }
 2246         }
 2247         err = dataset_drop_listed_variables(list, dset, NULL, NULL);
 2248         free(list);
 2249     }
 2250     }
 2251 
 2252     return err;
 2253 }
 2254 
 2255 int dataset_set_matrix_name (DATASET *dset, const char *name)
 2256 {
 2257     int err = 0;
 2258 
 2259     if (dset->descrip != NULL) {
 2260     free(dset->descrip);
 2261     dset->descrip = NULL;
 2262     }
 2263 
 2264     if (name != NULL && *name != '\0') {
 2265     dset->descrip = malloc(strlen(name) + 8);
 2266     if (dset->descrip == NULL) {
 2267         err = E_ALLOC;
 2268     } else {
 2269         sprintf(dset->descrip, "matrix:%s", name);
 2270     }
 2271     }
 2272 
 2273     return err;
 2274 }
 2275 
 2276 const char *dataset_get_matrix_name (const DATASET *dset)
 2277 {
 2278     if (dset->descrip != NULL &&
 2279     strlen(dset->descrip) > 7 &&
 2280     !strncmp(dset->descrip, "matrix:", 7)) {
 2281     return dset->descrip + 7;
 2282     } else {
 2283     return NULL;
 2284     }
 2285 }
 2286 
 2287 const char *dataset_period_label (const DATASET *dset)
 2288 {
 2289     if (dset == NULL) {
 2290     return _("periods");
 2291     } else if (quarterly_or_monthly(dset)) {
 2292     return dset->pd == 4 ? _("quarters") : _("months");
 2293     } else if (annual_data(dset)) {
 2294     return _("years");
 2295     } else if (dataset_is_weekly(dset)) {
 2296     return _("weeks");
 2297     } else if (dataset_is_daily(dset)) {
 2298     return _("days");
 2299     } else if (dataset_is_hourly(dset)) {
 2300     return _("hours");
 2301     } else {
 2302     return _("periods");
 2303     }
 2304 }
 2305 
 2306 /* intended for use with newly imported data: trash any
 2307    series that contain nothing but NAs
 2308 */
 2309 
 2310 int maybe_prune_dataset (DATASET **pdset, void *p)
 2311 {
 2312     DATASET *dset = *pdset;
 2313     int allmiss, prune = 0, err = 0;
 2314     int i, t;
 2315 
 2316     for (i=1; i<dset->v; i++) {
 2317     allmiss = 1;
 2318     for (t=0; t<dset->n; t++) {
 2319         if (!na(dset->Z[i][t])) {
 2320         allmiss = 0;
 2321         break;
 2322         }
 2323     }
 2324     if (allmiss) {
 2325         prune = 1;
 2326         break;
 2327     }
 2328     }
 2329 
 2330     if (prune) {
 2331     char *mask = calloc(dset->v, 1);
 2332     DATASET *newset = NULL;
 2333     int ndrop = 0;
 2334 
 2335     if (mask == NULL) {
 2336         return E_ALLOC;
 2337     }
 2338 
 2339     for (i=1; i<dset->v; i++) {
 2340         allmiss = 1;
 2341         for (t=0; t<dset->n; t++) {
 2342         if (!na(dset->Z[i][t])) {
 2343             allmiss = 0;
 2344             break;
 2345         }
 2346         }
 2347         if (allmiss) {
 2348         mask[i] = 1;
 2349         ndrop++;
 2350         }
 2351     }
 2352 
 2353     newset = datainfo_new();
 2354     if (newset == NULL) {
 2355         err = E_ALLOC;
 2356     } else {
 2357         newset->v = dset->v - ndrop;
 2358         newset->n = dset->n;
 2359         err = start_new_Z(newset, 0);
 2360     }
 2361 
 2362     if (!err) {
 2363         gretl_string_table *st = (gretl_string_table *) p;
 2364         size_t ssize = dset->n * sizeof **newset->Z;
 2365         int k = 1;
 2366 
 2367         for (i=1; i<dset->v; i++) {
 2368         if (!mask[i]) {
 2369             memcpy(newset->Z[k], dset->Z[i], ssize);
 2370             strcpy(newset->varname[k], dset->varname[i]);
 2371             copy_label(&newset->varinfo[k]->label,
 2372                    dset->varinfo[i]->label);
 2373             if (st != NULL && k < i) {
 2374             gretl_string_table_reset_column_id(st, i, k);
 2375             }
 2376             k++;
 2377         }
 2378         }
 2379 
 2380         destroy_dataset(dset);
 2381         *pdset = newset;
 2382 
 2383         fprintf(stderr, "pruned dataset to %d variables\n", newset->v);
 2384     }
 2385 
 2386     free(mask);
 2387     }
 2388 
 2389     return err;
 2390 }
 2391 
 2392 /* apparatus for sorting entire dataset */
 2393 
 2394 typedef struct spoint_t_ spoint_t;
 2395 
 2396 struct spoint_t_ {
 2397     int obsnum;
 2398     int nvals;
 2399     double *vals;
 2400 };
 2401 
 2402 static void free_spoints (spoint_t *sv, int n)
 2403 {
 2404     int i;
 2405 
 2406     for (i=0; i<n; i++) {
 2407     free(sv[i].vals);
 2408     }
 2409 
 2410     free(sv);
 2411 }
 2412 
 2413 static spoint_t *allocate_spoints (int n, int v)
 2414 {
 2415     spoint_t *sv;
 2416     int i;
 2417 
 2418     sv = malloc(n * sizeof *sv);
 2419 
 2420     if (sv != NULL) {
 2421     for (i=0; i<n; i++) {
 2422         sv[i].vals = NULL;
 2423     }
 2424     for (i=0; i<n; i++) {
 2425         sv[i].vals = malloc(v * sizeof(double));
 2426         if (sv[i].vals == NULL) {
 2427         free_spoints(sv, n);
 2428         sv = NULL;
 2429         break;
 2430         }
 2431     }
 2432     }
 2433 
 2434     return sv;
 2435 }
 2436 
 2437 static int compare_vals_up (const void *a, const void *b)
 2438 {
 2439     const spoint_t *pa = (const spoint_t *) a;
 2440     const spoint_t *pb = (const spoint_t *) b;
 2441     int i, ret = 0;
 2442 
 2443     for (i=0; i<pa->nvals && !ret; i++) {
 2444     if (isnan(pa->vals[i]) || isnan(pb->vals[i])) {
 2445         if (!isnan(pa->vals[i])) {
 2446         ret = -1;
 2447         } else if (!isnan(pb->vals[i])) {
 2448         ret = 1;
 2449         }
 2450     } else {
 2451         ret = (pa->vals[i] > pb->vals[i]) - (pa->vals[i] < pb->vals[i]);
 2452     }
 2453     }
 2454 
 2455     return ret;
 2456 }
 2457 
 2458 static int compare_vals_down (const void *a, const void *b)
 2459 {
 2460     const spoint_t *pa = (const spoint_t *) a;
 2461     const spoint_t *pb = (const spoint_t *) b;
 2462     int i, ret = 0;
 2463 
 2464     for (i=0; i<pa->nvals && !ret; i++) {
 2465     if (isnan(pa->vals[i]) || isnan(pb->vals[i])) {
 2466         if (!isnan(pa->vals[i])) {
 2467         ret = 1;
 2468         } else if (!isnan(pb->vals[i])) {
 2469         ret = -1;
 2470         }
 2471     } else {
 2472         ret = (pa->vals[i] < pb->vals[i]) - (pa->vals[i] > pb->vals[i]);
 2473     }
 2474     }
 2475 
 2476     return ret;
 2477 }
 2478 
 2479 int dataset_sort_by (DATASET *dset, const int *list, gretlopt opt)
 2480 {
 2481     spoint_t *sv = NULL;
 2482     double *x = NULL;
 2483     char **S = NULL;
 2484     int ns = list[0];
 2485     int i, t, v;
 2486     int err = 0;
 2487 
 2488     sv = allocate_spoints(dset->n, ns);
 2489     if (sv == NULL) {
 2490     return E_ALLOC;
 2491     }
 2492 
 2493     x = malloc(dset->n * sizeof *x);
 2494     if (x == NULL) {
 2495     free_spoints(sv, dset->n);
 2496     return E_ALLOC;
 2497     }
 2498 
 2499     if (dset->S != NULL) {
 2500     S = strings_array_new_with_length(dset->n, OBSLEN);
 2501     if (S == NULL) {
 2502         err = E_ALLOC;
 2503         goto bailout;
 2504     }
 2505     }
 2506 
 2507     for (t=0; t<dset->n; t++) {
 2508     sv[t].obsnum = t;
 2509     for (i=0; i<ns; i++) {
 2510         v = list[i+1];
 2511         sv[t].vals[i] = dset->Z[v][t];
 2512         sv[t].nvals = ns;
 2513     }
 2514     }
 2515 
 2516     if (opt & OPT_D) {
 2517     /* descending */
 2518     qsort(sv, dset->n, sizeof *sv, compare_vals_down);
 2519     } else {
 2520     qsort(sv, dset->n, sizeof *sv, compare_vals_up);
 2521     }
 2522 
 2523     for (i=1; i<dset->v; i++) {
 2524     for (t=0; t<dset->n; t++) {
 2525         x[t] = dset->Z[i][sv[t].obsnum];
 2526     }
 2527     for (t=0; t<dset->n; t++) {
 2528         dset->Z[i][t] = x[t];
 2529     }
 2530     }
 2531 
 2532     if (S != NULL) {
 2533     for (t=0; t<dset->n; t++) {
 2534         strcpy(S[t], dset->S[sv[t].obsnum]);
 2535     }
 2536     strings_array_free(dset->S, dset->n);
 2537     dset->S = S;
 2538     }
 2539 
 2540  bailout:
 2541 
 2542     free_spoints(sv, dset->n);
 2543     free(x);
 2544 
 2545     return err;
 2546 }
 2547 
 2548 static int dataset_sort (DATASET *dset, const int *list,
 2549              gretlopt opt)
 2550 {
 2551     if (dataset_is_time_series(dset) ||
 2552     dataset_is_panel(dset)) {
 2553     gretl_errmsg_set("You can only do this with undated data");
 2554     return E_DATA;
 2555     }
 2556 
 2557     if (list == NULL || list[0] < 1) {
 2558     return E_DATA;
 2559     }
 2560 
 2561     return dataset_sort_by(dset, list, opt);
 2562 }
 2563 
 2564 /**
 2565  * dataset_drop_last_variables:
 2566  * @dset: pointer to dataset.
 2567  * @delvars: number of variables to be dropped.
 2568  *
 2569  * Deletes from the dataset the number @delvars of variables
 2570  * that were added most recently (that have the highest ID numbers).
 2571  *
 2572  * Returns: 0 on success, E_ALLOC on error.
 2573  */
 2574 
 2575 int dataset_drop_last_variables (DATASET *dset, int delvars)
 2576 {
 2577     int newv = dset->v - delvars;
 2578     int i, err = 0;
 2579 
 2580     if (delvars <= 0) {
 2581     return 0;
 2582     }
 2583 
 2584 #if FULLDEBUG
 2585     fprintf(stderr, "*** dataset_drop_last_variables: dropping %d, newv = %d\n",
 2586         delvars, newv);
 2587 #endif
 2588 
 2589     if (newv < 1) {
 2590     fprintf(stderr, "dataset_drop_last_vars: dset->v = %d, delvars = %d "
 2591         " -> newv = %d\n (dset = %p)\n", dset->v, delvars,
 2592         newv, (void *) dset);
 2593     return E_DATA;
 2594     }
 2595 
 2596 #if FULLDEBUG
 2597     for (i=0; i<dset->v; i++) {
 2598     if (dset->Z[i] == NULL) {
 2599         fprintf(stderr, "var %d (%s, level %d, val = NULL) %s\n",
 2600             i, dset->varname[i], dset->varinfo[i]->stack_level,
 2601             (i >= newv)? "deleting" : "");
 2602     } else {
 2603         fprintf(stderr, "var %d (%s, level %d, val[0] = %g) %s\n",
 2604             i, dset->varname[i], dset->varinfo[i]->stack_level,
 2605             dset->Z[i][0], (i >= newv)? "deleting" : "");
 2606     }
 2607     }
 2608 #endif
 2609 
 2610 #if 0
 2611     fprintf(stderr, "dataset_drop_last_variables: origv=%d, newv=%d\n",
 2612         dset->v, newv);
 2613     for (i=1; i<dset->v; i++) {
 2614     fprintf(stderr, "before: var[%d] = '%s'\n", i, dset->varname[i]);
 2615     }
 2616 #endif
 2617 
 2618     for (i=newv; i<dset->v; i++) {
 2619     free(dset->varname[i]);
 2620     free_varinfo(dset, i);
 2621     free(dset->Z[i]);
 2622     dset->Z[i] = NULL;
 2623     }
 2624 
 2625     err = shrink_dataset_to_size(dset, newv, DROP_NORMAL);
 2626 
 2627 #if 0
 2628     for (i=1; i<dset->v; i++) {
 2629     fprintf(stderr, "after: var[%d] = '%s'\n", i, dset->varname[i]);
 2630     }
 2631 #endif
 2632 
 2633     if (!err && !dset->auxiliary) {
 2634     err = gretl_lists_revise(NULL, newv);
 2635     }
 2636 
 2637     if (!err && complex_subsampled()) {
 2638     DATASET *fset = fetch_full_dataset();
 2639 
 2640     /*
 2641        Context: we're deleting @delvars variables at the end of
 2642        dset->Z, leaving @newv variables.  The dataset is currently
 2643        subsampled.
 2644 
 2645        Question: should we delete any variables at the end of
 2646        fset->Z to keep the two arrays in sync?
 2647 
 2648        If @newv < fset->v, this must mean that at least some of
 2649        the extra vars we're deleting from the current sub-sampled
 2650        Z have already been synced to the full Z, so we should do
 2651        the deletion from full Z.
 2652     */
 2653 
 2654     if (newv < fset->v) {
 2655 #if FULLDEBUG
 2656         fprintf(stderr, "prior fset->v = %d: shrinking full Z to %d vars\n",
 2657             fset->v, newv);
 2658 #endif
 2659         for (i=newv; i<fset->v; i++) {
 2660         free(fset->Z[i]);
 2661         fset->Z[i] = NULL;
 2662         }
 2663         err = shrink_dataset_to_size(fset, newv, DROP_SPECIAL);
 2664     }
 2665     }
 2666 
 2667     return err;
 2668 }
 2669 
 2670 /* Apparatus for stacking variables (e.g. in case of panel
 2671    data that were read in "wrongly").
 2672 */
 2673 
 2674 static int missing_tail (const double *x, int n)
 2675 {
 2676     int i, nmiss = 0;
 2677 
 2678     for (i=n-1; i>=0; i--) {
 2679     if (na(x[i])) {
 2680         nmiss++;
 2681     } else {
 2682         break;
 2683     }
 2684     }
 2685 
 2686     return nmiss;
 2687 }
 2688 
 2689 /**
 2690  * build_stacked_series:
 2691  * @pstack: location for returning stacked series.
 2692  * @list: list of series to be stacked.
 2693  * @length: number of observations to use per input series (or 0 for auto).
 2694  * @offset: offset at which to start drawing observations.
 2695  * @dset: pointer to dataset.
 2696  *
 2697  * Really for internal use. Don't worry about it.
 2698  *
 2699  * Returns: 0 on success, non-zero code on error.
 2700  */
 2701 
 2702 int build_stacked_series (double **pstack, int *list,
 2703               int length, int offset,
 2704               DATASET *dset)
 2705 {
 2706     double *xstack = NULL;
 2707     int nv, oldn, bign;
 2708     int i, err = 0;
 2709 
 2710     if (dset == NULL || dset->n == 0) {
 2711     return E_NODATA;
 2712     } else if (list == NULL || list[0] <= 0) {
 2713     return E_INVARG;
 2714     } else if (length + offset > dset->n) {
 2715     return E_INVARG;
 2716     }
 2717 
 2718     nv = list[0];
 2719 
 2720 #if PDEBUG
 2721     fprintf(stderr, "nv = %d, length = %d, offset = %d\n", nv, length, offset);
 2722 #endif
 2723 
 2724     if (length > 0) {
 2725     bign = nv * length;
 2726     if (bign < dset->n) {
 2727         bign = dset->n;
 2728     }
 2729     } else {
 2730     /* calculate required series length */
 2731     length = 0;
 2732     for (i=0; i<nv; i++) {
 2733         int j = list[i+1];
 2734         int ok = dset->n - missing_tail(dset->Z[j], dset->n); /* ?? */
 2735 
 2736         if (ok > length) {
 2737         length = ok;
 2738         }
 2739     }
 2740 
 2741     if (length * nv <= dset->n && dset->n % length == 0) {
 2742         /* suggests that at least one var has already been stacked */
 2743         bign = dset->n;
 2744         length -= offset;
 2745     } else {
 2746         /* no stacking done yet: need to expand series length */
 2747         bign = nv * (dset->n - offset);
 2748         length = 0;
 2749     }
 2750     }
 2751 
 2752 #if PDEBUG
 2753     fprintf(stderr, "bign = %d, allocating xstack (oldn = %d)\n", bign, dset->n);
 2754 #endif
 2755 
 2756     /* allocate container for stacked data */
 2757     xstack = malloc(bign * sizeof *xstack);
 2758     if (xstack == NULL) {
 2759     return E_ALLOC;
 2760     }
 2761 
 2762     /* extend length of all series? */
 2763     oldn = dset->n;
 2764     if (bign > oldn) {
 2765     err = dataset_add_observations(dset, bign - oldn, OPT_NONE);
 2766     if (err) {
 2767         return err;
 2768     }
 2769     }
 2770 
 2771     /* construct stacked series */
 2772     for (i=0; i<nv; i++) {
 2773     int j = list[i+1];
 2774     int t, bigt, tmax;
 2775 
 2776     if (length > 0) {
 2777         bigt = length * i;
 2778         tmax = offset + length;
 2779     } else {
 2780         bigt = oldn * i;
 2781         tmax = oldn;
 2782     }
 2783 
 2784     for (t=offset; t<tmax; t++) {
 2785         xstack[bigt] = dset->Z[j][t];
 2786         if (dset->S != NULL && bigt != t) {
 2787         strcpy(dset->S[bigt], dset->S[t]);
 2788         }
 2789         bigt++;
 2790     }
 2791 
 2792     if (i == nv - 1) {
 2793         for (t=bigt; t<bign; t++) {
 2794         xstack[bigt++] = NADBL;
 2795         }
 2796     }
 2797     }
 2798 
 2799     *pstack = xstack;
 2800 
 2801     return err;
 2802 }
 2803 
 2804 static int found_log_parent (const char *s, char *targ)
 2805 {
 2806     int len = gretl_namechar_spn(s);
 2807 
 2808     if (len < VNAMELEN && s[len] == ')') {
 2809     char fmt[8];
 2810 
 2811     sprintf(fmt, "%%%d[^)]", VNAMELEN-1);
 2812     sscanf(s, fmt, targ);
 2813     return 1;
 2814     }
 2815 
 2816     return 0;
 2817 }
 2818 
 2819 /**
 2820  * series_is_log:
 2821  * @dset: dataset information.
 2822  * @i: ID number of series.
 2823  * @parent: location to which to write the name of the
 2824  * "parent" variable if any.
 2825  *
 2826  * Tries to determine if the variable with ID number @i is
 2827  * the logarithm of some other variable.
 2828  *
 2829  * Returns: 1 if variable @i appears to be a log, else 0.
 2830  */
 2831 
 2832 int series_is_log (const DATASET *dset, int i, char *parent)
 2833 {
 2834     const char *s = series_get_label(dset, i);
 2835 
 2836     *parent = '\0';
 2837 
 2838     if (s != NULL && *s != '\0') {
 2839     char fmt[16];
 2840 
 2841     sprintf(fmt, "= log of %%%ds", VNAMELEN-1);
 2842 
 2843     if (sscanf(s, fmt, parent) == 1) {
 2844         return 1;
 2845     } else if (!strncmp(s, "log(", 4)) {
 2846         return found_log_parent(s + 4, parent);
 2847     } else {
 2848         s += strcspn(s, "=");
 2849         if (!strncmp(s, "=log(", 5)) {
 2850         return found_log_parent(s + 5, parent);
 2851         }
 2852     }
 2853     }
 2854 
 2855     return 0;
 2856 }
 2857 
 2858 /**
 2859  * series_set_discrete:
 2860  * @dset: pointer to data information struct.
 2861  * @i: index number of series.
 2862  * @s: non-zero to mark variable as discrete, zero to
 2863  * mark as not discrete.
 2864  *
 2865  * Mark a variable as being discrete or not.
 2866  */
 2867 
 2868 void series_set_discrete (DATASET *dset, int i, int s)
 2869 {
 2870     if (i > 0 && i < dset->v) {
 2871     int flags = dset->varinfo[i]->flags;
 2872 
 2873     if (s && !(flags & VAR_DISCRETE)) {
 2874         dset->varinfo[i]->flags |= VAR_DISCRETE;
 2875         set_dataset_is_changed();
 2876     } else if (!s && (flags & VAR_DISCRETE)) {
 2877         dset->varinfo[i]->flags &= ~VAR_DISCRETE;
 2878         set_dataset_is_changed();
 2879     }
 2880     }
 2881 }
 2882 
 2883 int series_record_label (DATASET *dset, int i,
 2884              const char *s)
 2885 {
 2886     char *targ = dset->varinfo[i]->label;
 2887 
 2888     if (labels_differ(targ, s)) {
 2889     copy_label(&dset->varinfo[i]->label, s);
 2890     set_dataset_is_changed();
 2891     }
 2892 
 2893     return 0;
 2894 }
 2895 
 2896 int series_record_display_name (DATASET *dset, int i,
 2897                 const char *s)
 2898 {
 2899     char *targ = dset->varinfo[i]->display_name;
 2900 
 2901     if (strcmp(targ, s)) {
 2902     *targ = '\0';
 2903     strncat(targ, s, MAXDISP - 1);
 2904     set_dataset_is_changed();
 2905     }
 2906 
 2907     return 0;
 2908 }
 2909 
 2910 const char *series_get_graph_name (const DATASET *dset, int i)
 2911 {
 2912     const char *ret = dset->varname[i];
 2913 
 2914     if (dset->varinfo != NULL && dset->varinfo[i] != NULL) {
 2915     if (dset->varinfo[i]->display_name[0] != '\0') {
 2916         ret = dset->varinfo[i]->display_name;
 2917     }
 2918     }
 2919 
 2920     return ret;
 2921 }
 2922 
 2923 static int add_obs (int n, DATASET *dset, gretlopt opt, PRN *prn)
 2924 {
 2925     int err = 0;
 2926 
 2927     if (complex_subsampled()) {
 2928     pprintf(prn, _("The data set is currently sub-sampled.\n"));
 2929     err = E_DATA;
 2930     } else if (n <= 0) {
 2931     err = E_PARSE;
 2932     } else if (opt & OPT_T) {
 2933     /* extending panel time */
 2934     err = panel_dataset_extend_time(dset, n);
 2935     if (!err) {
 2936         pprintf(prn, _("Panel time extended by %d observations"), n);
 2937         pputc(prn, '\n');
 2938     }
 2939     } else {
 2940     err = dataset_add_observations(dset, n, OPT_A);
 2941     if (!err) {
 2942         pprintf(prn, _("Dataset extended by %d observations"), n);
 2943         pputc(prn, '\n');
 2944         extend_function_sample_range(n);
 2945     }
 2946     }
 2947 
 2948     return err;
 2949 }
 2950 
 2951 static int insert_obs (int n, DATASET *dset, PRN *prn)
 2952 {
 2953     int err = 0;
 2954 
 2955     if (complex_subsampled()) {
 2956     pprintf(prn, _("The data set is currently sub-sampled.\n"));
 2957     err = E_DATA;
 2958     } else if (dataset_is_panel(dset)) {
 2959     err = E_PDWRONG;
 2960     } else if (n <= 0 || n > dset->n) {
 2961     err = E_DATA;
 2962     } else {
 2963     err = real_insert_observation(n - 1, dset);
 2964     }
 2965 
 2966     return err;
 2967 }
 2968 
 2969 int dataset_op_from_string (const char *s)
 2970 {
 2971     int op = DS_NONE;
 2972 
 2973     if (s == NULL || *s == '\0') {
 2974     return DS_NONE;
 2975     }
 2976 
 2977     if (!strcmp(s, "addobs")) {
 2978     op = DS_ADDOBS;
 2979     } else if (!strcmp(s, "compact")) {
 2980     op = DS_COMPACT;
 2981     } else if (!strcmp(s, "expand")) {
 2982     op = DS_EXPAND;
 2983     } else if (!strcmp(s, "transpose")) {
 2984     op = DS_TRANSPOSE;
 2985     } else if (!strcmp(s, "delete")) {
 2986     op = DS_DELETE;
 2987     } else if (!strcmp(s, "keep")) {
 2988     op = DS_KEEP;
 2989     } else if (!strcmp(s, "sortby")) {
 2990     op = DS_SORTBY;
 2991     } else if (!strcmp(s, "dsortby")) {
 2992     op = DS_DSORTBY;
 2993     } else if (!strcmp(s, "resample")) {
 2994     op = DS_RESAMPLE;
 2995     } else if (!strcmp(s, "restore")) {
 2996     op = DS_RESTORE;
 2997     } else if (!strcmp(s, "clear")) {
 2998     op = DS_CLEAR;
 2999     } else if (!strcmp(s, "renumber")) {
 3000     op = DS_RENUMBER;
 3001     } else if (!strcmp(s, "insobs")) {
 3002     op = DS_INSOBS;
 3003     } else if (!strcmp(s, "pad-daily")) {
 3004     op = DS_PAD_DAILY;
 3005     }
 3006 
 3007     return op;
 3008 }
 3009 
 3010 static int dataset_int_param (const char **ps, int op,
 3011                   DATASET *dset, int *err)
 3012 {
 3013     const char *s = *ps;
 3014     char test[32];
 3015     int k = 0;
 3016 
 3017     if ((op == DS_COMPACT || op == DS_EXPAND) &&
 3018     !dataset_is_time_series(dset)) {
 3019     *err = E_PDWRONG;
 3020     return 0;
 3021     }
 3022 
 3023     if (op == DS_PAD_DAILY && !dated_daily_data(dset)) {
 3024     *err = E_PDWRONG;
 3025     return 0;
 3026     }
 3027 
 3028     *test = '\0';
 3029     sscanf(s, "%31s", test);
 3030     *ps += strlen(test);
 3031 
 3032     k = gretl_int_from_string(test, err);
 3033     if (*err) {
 3034     return 0;
 3035     }
 3036 
 3037     if (k <= 0 || (op == DS_RESAMPLE && k < 1)) {
 3038     *err = E_DATA;
 3039     } else if (op == DS_INSOBS) {
 3040     if (k > dset->n) {
 3041         *err = E_DATA;
 3042     }
 3043     } else if (op == DS_COMPACT) {
 3044     int ok = 0;
 3045 
 3046     if (dset->pd == 12 && (k == 4 || k == 1)) {
 3047         ok = 1;
 3048     } else if (dset->pd == 4 && k == 1) {
 3049         ok = 1;
 3050     } else if (dset->pd == 52 && k == 12) {
 3051         ok = 1;
 3052     } else if (dated_daily_data(dset) && (k == 52 || k == 12)) {
 3053         ok = 1;
 3054     } else if (dataset_is_daily(dset) && k == 4) {
 3055         if (strstr(*ps, "spread")) {
 3056         ok = 1;
 3057         }
 3058     }
 3059 
 3060     if (!ok) {
 3061         *err = E_PDWRONG;
 3062         gretl_errmsg_set("This conversion is not supported");
 3063     }
 3064     } else if (op == DS_PAD_DAILY) {
 3065     if (k < 5 || k > 7 || k < dset->pd) {
 3066         *err = E_PDWRONG;
 3067         gretl_errmsg_set("This conversion is not supported");
 3068     }
 3069     }
 3070 
 3071     return k;
 3072 }
 3073 
 3074 static int compact_data_set_wrapper (const char *s, DATASET *dset,
 3075                      int k)
 3076 {
 3077     CompactMethod method = COMPACT_AVG;
 3078 
 3079     if (s != NULL) {
 3080     s += strspn(s, " ");
 3081     if (!strcmp(s, "sum")) {
 3082         method = COMPACT_SUM;
 3083     } else if (!strcmp(s, "first") || !strcmp(s, "sop")) {
 3084         method = COMPACT_SOP;
 3085     } else if (!strcmp(s, "last") || !strcmp(s, "eop")) {
 3086         method = COMPACT_EOP;
 3087     } else if (!strcmp(s, "spread")) {
 3088         method = COMPACT_SPREAD;
 3089     } else if (!strcmp(s, "avg") || !strcmp(s, "average")) {
 3090         method = COMPACT_AVG;
 3091     } else if (*s != '\0') {
 3092         return E_PARSE;
 3093     }
 3094     }
 3095 
 3096     return compact_data_set(dset, k, method, 0, 0);
 3097 }
 3098 
 3099 static unsigned int resample_seed;
 3100 
 3101 unsigned int get_resampling_seed (void)
 3102 {
 3103     return resample_seed;
 3104 }
 3105 
 3106 /* resample the dataset by observation, with replacement */
 3107 
 3108 int dataset_resample (DATASET *dset, int n, unsigned int seed)
 3109 {
 3110     DATASET *rset = NULL;
 3111     char **S = NULL;
 3112     unsigned int state;
 3113     int T = sample_size(dset);
 3114     int v = dset->v;
 3115     int i, j, s, t;
 3116     int err = 0;
 3117 
 3118     if (v < 2) {
 3119     return E_DATA;
 3120     }
 3121 
 3122     rset = datainfo_new();
 3123     if (rset == NULL) {
 3124     return E_ALLOC;
 3125     }
 3126 
 3127     rset->Z = malloc(v * sizeof *rset->Z);
 3128     if (rset->Z == NULL) {
 3129     free(rset);
 3130     return E_ALLOC;
 3131     }
 3132 
 3133     for (i=0; i<v; i++) {
 3134     rset->Z[i] = NULL;
 3135     }
 3136 
 3137     rset->v = v;
 3138 
 3139     j = 0;
 3140     for (i=0; i<dset->v && !err; i++) {
 3141     rset->Z[j] = malloc(n * sizeof **rset->Z);
 3142     if (rset->Z[j] == NULL) {
 3143         err = E_ALLOC;
 3144     } else if (i == 0) {
 3145         for (t=0; t<n; t++) {
 3146         rset->Z[j][t] = 1.0;
 3147         }
 3148     }
 3149     j++;
 3150     }
 3151 
 3152     if (err) {
 3153     goto bailout;
 3154     }
 3155 
 3156     if (dset->markers == REGULAR_MARKERS) {
 3157     S = strings_array_new_with_length(n, OBSLEN);
 3158     }
 3159 
 3160     if (seed > 0) {
 3161     resample_seed = seed;
 3162     gretl_rand_set_seed(seed);
 3163     } else {
 3164     resample_seed = gretl_rand_get_seed();
 3165     }
 3166 
 3167     state = gretl_rand_int();
 3168 
 3169     for (t=0; t<n; t++) {
 3170     s = gretl_rand_int_max(T) + dset->t1;
 3171     j = 1;
 3172     for (i=1; i<dset->v; i++) {
 3173         rset->Z[j][t] = dset->Z[i][s];
 3174         j++;
 3175     }
 3176     if (S != NULL) {
 3177         strcpy(S[t], dset->S[s]);
 3178     }
 3179     }
 3180 
 3181     if (S != NULL) {
 3182     rset->S = S;
 3183     rset->markers = REGULAR_MARKERS;
 3184     }
 3185 
 3186     rset->varname = dset->varname;
 3187     rset->varinfo = dset->varinfo;
 3188     rset->descrip = dset->descrip;
 3189 
 3190     rset->n = n;
 3191     rset->t1 = 0;
 3192     rset->t2 = n - 1;
 3193     dataset_obs_info_default(rset);
 3194 
 3195     set_dataset_resampled(rset, state);
 3196 
 3197  bailout:
 3198 
 3199     if (err) {
 3200     free_Z(rset);
 3201     clear_datainfo(rset, CLEAR_SUBSAMPLE);
 3202     free(rset);
 3203     } else {
 3204     backup_full_dataset(dset);
 3205     *dset = *rset;
 3206     free(rset);
 3207     }
 3208 
 3209     return err;
 3210 }
 3211 
 3212 /* note: @list should contain a single series ID, that of the
 3213    target series, and @param should hold a numeric string
 3214    giving the position to which @targ should be moved;
 3215    @fixmax is the greatest series ID number that cannot be
 3216    changed (based on saved models, etc., as determined by the
 3217    caller)
 3218 */
 3219 
 3220 int renumber_series_with_checks (const int *list,
 3221                  const char *param,
 3222                  int fixmax,
 3223                  DATASET *dset,
 3224                  PRN *prn)
 3225 {
 3226     char vname[VNAMELEN];
 3227     int v_old, v_new;
 3228     int f1, err = 0;
 3229 
 3230     if (list == NULL || list[0] != 1 ||
 3231     param == NULL || *param == '\0') {
 3232     return E_INVARG;
 3233     }
 3234 
 3235     if (sscanf(param, "%d", &v_new) != 1) {
 3236     return E_INVARG;
 3237     }
 3238 
 3239     v_old = list[1];
 3240 
 3241     if (v_old < 1 || v_old > dset->v - 1 ||
 3242     v_new < 1 || v_new > dset->v - 1) {
 3243     /* out of bounds */
 3244     return E_INVARG;
 3245     } else if (v_new == v_old) {
 3246     /* no-op */
 3247     return 0;
 3248     }
 3249 
 3250     f1 = max_varno_in_saved_lists();
 3251 
 3252     if (f1 > fixmax) {
 3253     fixmax = f1;
 3254     }
 3255 
 3256     strcpy(vname, dset->varname[v_old]);
 3257 
 3258     if (v_old <= fixmax) {
 3259     gretl_errmsg_sprintf(_("Variable %s cannot be renumbered"), vname);
 3260     err = E_DATA;
 3261     } else if (v_new <= fixmax) {
 3262     gretl_errmsg_sprintf(_("Target ID %d is not available"), v_new);
 3263     err = E_DATA;
 3264     } else {
 3265     err = dataset_renumber_variable(v_old, v_new, dset);
 3266     }
 3267 
 3268     if (!err && gretl_messages_on()) {
 3269     pprintf(prn, _("Renumbered %s as variable %d\n"),
 3270         vname, v_new);
 3271     maybe_list_series(dset, prn);
 3272     }
 3273 
 3274     return err;
 3275 }
 3276 
 3277 /* alternate forms:
 3278 
 3279            @op        @list  @param
 3280    dataset addobs            24
 3281    dataset compact           1
 3282    dataset compact           4 last
 3283    dataset expand            interpolate
 3284    dataset transpose
 3285    dataset sortby     x1
 3286    dataset resample          500
 3287    dataset clear
 3288    dataset renumber   orig   2
 3289    dataset insobs            13
 3290    dataset pad-daily         7
 3291 
 3292 */
 3293 
 3294 int modify_dataset (DATASET *dset, int op, const int *list,
 3295             const char *param, gretlopt opt, PRN *prn)
 3296 {
 3297     static int resampled;
 3298     int k = 0, err = 0;
 3299 
 3300     if (dset == NULL || dset->Z == NULL) {
 3301     return E_NODATA;
 3302     }
 3303 
 3304 #if 0
 3305     fprintf(stderr, "modify_dataset: op=%d, param='%s'\n", op, param);
 3306     printlist(list, "list");
 3307 #endif
 3308 
 3309     if (op == DS_CLEAR || op == DS_RENUMBER) {
 3310     /* must be handled by the calling program */
 3311     return E_NOTIMP;
 3312     }
 3313 
 3314     if (gretl_function_depth() > 0) {
 3315     if (op == DS_ADDOBS && !complex_subsampled() &&
 3316         dset->t2 == dset->n - 1 && !(opt & OPT_T)) {
 3317         /* experimental, 2015-07-28: allow "addobs" within a
 3318            function provided the dataset is not subsampled
 3319         */
 3320         goto proceed;
 3321     } else {
 3322         gretl_errmsg_set(_("The 'dataset' command is not available "
 3323                    "within functions"));
 3324         return 1;
 3325     }
 3326     }
 3327 
 3328     if (gretl_looping() && op != DS_RESAMPLE &&
 3329     op != DS_RESTORE && op != DS_SORTBY) {
 3330     pputs(prn, _("Sorry, this command is not available in loop mode\n"));
 3331     return 1;
 3332     }
 3333 
 3334     if (op == DS_RESAMPLE && resampled) {
 3335     /* repeated "resample": implicitly restore first */
 3336     err = restore_full_sample(dset, NULL);
 3337     if (err) {
 3338         return err;
 3339     } else {
 3340         resampled = 0;
 3341     }
 3342     }
 3343 
 3344     if (op != DS_RESTORE && complex_subsampled()) {
 3345     gretl_errmsg_set(_("The data set is currently sub-sampled"));
 3346     return 1;
 3347     }
 3348 
 3349  proceed:
 3350 
 3351     if (op == DS_ADDOBS || op == DS_INSOBS ||
 3352     op == DS_COMPACT || op == DS_RESAMPLE ||
 3353     op == DS_PAD_DAILY) {
 3354     if (param == NULL) {
 3355         err = E_ARGS;
 3356     } else {
 3357         k = dataset_int_param(&param, op, dset, &err);
 3358     }
 3359     if (err) {
 3360         return err;
 3361     }
 3362     } else if (op == DS_EXPAND) {
 3363     if (dset->pd == 1) {
 3364         k = 4;
 3365     } else if (dset->pd == 4) {
 3366         k = 12;
 3367     } else {
 3368         return E_PDWRONG;
 3369     }
 3370     }
 3371 
 3372     if (op == DS_ADDOBS) {
 3373     err = add_obs(k, dset, opt, prn);
 3374     } else if (op == DS_INSOBS) {
 3375     err = insert_obs(k, dset, prn);
 3376     } else if (op == DS_COMPACT) {
 3377     err = compact_data_set_wrapper(param, dset, k);
 3378     } else if (op == DS_EXPAND) {
 3379     int n = (param == NULL)? 0 : strlen(param);
 3380     int interp = 0;
 3381 
 3382     if (n > 0 && !strncmp(param, "interpolate", n)) {
 3383         interp = 1;
 3384     }
 3385     err = expand_data_set(dset, k, interp);
 3386     } else if (op == DS_PAD_DAILY) {
 3387     err = pad_daily_data(dset, k, prn);
 3388     } else if (op == DS_TRANSPOSE) {
 3389     err = transpose_data(dset);
 3390     } else if (op == DS_SORTBY) {
 3391     err = dataset_sort(dset, list, OPT_NONE);
 3392     } else if (op == DS_DSORTBY) {
 3393     err = dataset_sort(dset, list, OPT_D);
 3394     } else if (op == DS_RESAMPLE) {
 3395     err = dataset_resample(dset, k, 0);
 3396     if (!err) {
 3397         resampled = 1;
 3398     }
 3399     } else if (op == DS_RESTORE) {
 3400     if (resampled) {
 3401         err = restore_full_sample(dset, NULL);
 3402         resampled = 0;
 3403     } else {
 3404         pprintf(prn, _("dataset restore: dataset is not resampled\n"));
 3405         err = E_DATA;
 3406     }
 3407     } else if (op == DS_DELETE) {
 3408     pprintf(prn, "dataset delete: not ready yet\n");
 3409     } else if (op == DS_KEEP) {
 3410     pprintf(prn, "dataset keep: not ready yet\n");
 3411     } else {
 3412     err = E_PARSE;
 3413     }
 3414 
 3415     return err;
 3416 }
 3417 
 3418 int dataset_get_structure (const DATASET *dset)
 3419 {
 3420     if (dset == NULL || dset->n == 0) {
 3421     return DATA_NONE;
 3422     } else if (dataset_is_panel(dset)) {
 3423     return DATA_PANEL;
 3424     } else if (dataset_is_time_series(dset)) {
 3425     return DATA_TS;
 3426     } else {
 3427     return DATA_XSECT;
 3428     }
 3429 }
 3430 
 3431 /**
 3432  * panel_sample_size:
 3433  * @dset: pointer to data information struct.
 3434  *
 3435  * Returns: the numbers of units/individuals in the current
 3436  * sample range, or 0 if the dataset is not a panel.
 3437  */
 3438 
 3439 int panel_sample_size (const DATASET *dset)
 3440 {
 3441     int ret = 0;
 3442 
 3443     if (dataset_is_panel(dset)) {
 3444     ret = (dset->t2 - dset->t1 + 1) / dset->pd;
 3445     }
 3446 
 3447     return ret;
 3448 }
 3449 
 3450 /**
 3451  * multi_unit_panel_sample:
 3452  * @dset: pointer to dataset.
 3453  *
 3454  * Returns: 1 if the dataset is a panel and the current sample
 3455  * range includes two or more individuals, otherwise 0.
 3456  */
 3457 
 3458 int multi_unit_panel_sample (const DATASET *dset)
 3459 {
 3460     int ret = 0;
 3461 
 3462     if (dataset_is_panel(dset)) {
 3463     ret = (dset->t2 - dset->t1 + 1) > dset->pd;
 3464     }
 3465 
 3466     return ret;
 3467 }
 3468 
 3469 /**
 3470  * dataset_purge_missing_rows:
 3471  * @dset: pointer to dataset.
 3472  *
 3473  * Removes empty rows from the dataset -- that is, observations
 3474  * at which there are no non-missing values.  This is intended
 3475  * for daily data only.
 3476  *
 3477  * Returns: 0 on success, non-zero code on error.
 3478  */
 3479 
 3480 int dataset_purge_missing_rows (DATASET *dset)
 3481 {
 3482     int new_n, missrow, totmiss = 0;
 3483     int t1 = dset->t1;
 3484     int t2 = dset->t2;
 3485     char **S = NULL;
 3486     double *Zi = NULL;
 3487     size_t sz;
 3488     int i, t, s;
 3489     int err = 0;
 3490 
 3491     for (t=0; t<dset->n; t++) {
 3492     missrow = 1;
 3493     for (i=1; i<dset->v; i++) {
 3494         if (!na(dset->Z[i][t])) {
 3495         missrow = 0;
 3496         break;
 3497         }
 3498     }
 3499     if (missrow) {
 3500         totmiss++;
 3501         if (t < dset->t1) {
 3502         t1--;
 3503         }
 3504         if (t < dset->t2) {
 3505         t2--;
 3506         }
 3507     }
 3508     }
 3509 
 3510     if (totmiss == 0) {
 3511     /* no-op */
 3512     return 0;
 3513     }
 3514 
 3515     if (dated_daily_data(dset) && dset->S == NULL) {
 3516     err = dataset_allocate_obs_markers(dset);
 3517     if (!err) {
 3518         for (t=0; t<dset->n; t++) {
 3519         calendar_date_string(dset->S[t], t, dset);
 3520         }
 3521     }
 3522     }
 3523 
 3524     for (t=0; t<dset->n; t++) {
 3525     missrow = 1;
 3526     for (i=1; i<dset->v; i++) {
 3527         if (!na(dset->Z[i][t])) {
 3528         missrow = 0;
 3529         break;
 3530         }
 3531     }
 3532     if (missrow) {
 3533         sz = (dset->n - t) * sizeof **dset->Z;
 3534         for (i=1; i<dset->v; i++) {
 3535         memmove(dset->Z[i] + t, dset->Z[i] + t + 1, sz);
 3536         }
 3537         if (dset->S != NULL) {
 3538         free(dset->S[t]);
 3539         for (s=t; s<dset->n - 1; s++) {
 3540             dset->S[s] = dset->S[s+1];
 3541         }
 3542         }
 3543     }
 3544     }
 3545 
 3546     new_n = dset->n - totmiss;
 3547 
 3548     for (i=1; i<dset->v; i++) {
 3549     Zi = realloc(dset->Z[i], new_n * sizeof *Zi);
 3550     if (Zi == NULL) {
 3551         err = E_ALLOC;
 3552     } else {
 3553         dset->Z[i] = Zi;
 3554     }
 3555     }
 3556 
 3557     if (!err && dset->S != NULL) {
 3558     S = realloc(dset->S, new_n * sizeof *S);
 3559     if (S == NULL) {
 3560         err = E_ALLOC;
 3561     } else {
 3562         dset->S = S;
 3563         if (dated_daily_data(dset)) {
 3564         strcpy(dset->stobs, dset->S[0]);
 3565         strcpy(dset->endobs, dset->S[new_n-1]);
 3566         dset->sd0 = get_epoch_day(dset->stobs);
 3567         }
 3568     }
 3569     }
 3570 
 3571     dataset_set_nobs(dset, new_n);
 3572     dset->t1 = t1;
 3573     dset->t2 = t2;
 3574 
 3575     return err;
 3576 }
 3577 
 3578 /**
 3579  * dataset_set_time_series:
 3580  * @dset: pointer to dataset.
 3581  * @pd: time series annual frequency (1 for annual, 4
 3582  * for quarterly or 12 for monthly).
 3583  * @yr0: starting year.
 3584  * @minor0: starting "minor" period, 1-based (quarter or
 3585  * month).
 3586  *
 3587  * Sets time-series properties on @dset: frequency @pd with
 3588  * starting observation @yr0, @minor0. If the data are
 3589  * annual (@pd = 1) then @minor0 is ignored.
 3590 
 3591  * Returns: 0 on success, non-zero code on error.
 3592  */
 3593 
 3594 int dataset_set_time_series (DATASET *dset, int pd,
 3595                  int yr0, int minor0)
 3596 {
 3597     int err = 0;
 3598 
 3599     if (pd != 1 && pd != 4 && pd != 12) {
 3600     err = E_DATA;
 3601     } else if (yr0 < 1) {
 3602     err = E_DATA;
 3603     } else if (pd > 1 && (minor0 < 1 || minor0 > pd)) {
 3604     err = E_DATA;
 3605     } else {
 3606     gchar *stobs = NULL;
 3607 
 3608     dataset_destroy_obs_markers(dset);
 3609     dset->structure = TIME_SERIES;
 3610     dset->pd = pd;
 3611 
 3612     if (pd == 1) {
 3613         stobs = g_strdup_printf("%d", yr0);
 3614     } else if (pd == 4) {
 3615         stobs = g_strdup_printf("%d.%d", yr0, minor0);
 3616     } else {
 3617         stobs = g_strdup_printf("%d.%02d", yr0, minor0);
 3618     }
 3619 
 3620     dset->sd0 = dot_atof(stobs);
 3621     ntodate(dset->stobs, 0, dset);
 3622     ntodate(dset->endobs, dset->n - 1, dset);
 3623     g_free(stobs);
 3624     }
 3625 
 3626     return err;
 3627 }
 3628 
 3629 /**
 3630  * series_is_discrete:
 3631  * @dset: pointer to dataset.
 3632  * @i: index number of series.
 3633  *
 3634  * Returns: non-zero iff series @i should be treated as discrete.
 3635  */
 3636 
 3637 int series_is_discrete (const DATASET *dset, int i)
 3638 {
 3639     return dset->varinfo[i]->flags & VAR_DISCRETE;
 3640 }
 3641 
 3642 /**
 3643  * series_is_hidden:
 3644  * @dset: pointer to dataset.
 3645  * @i: index number of series.
 3646  *
 3647  * Returns: non-zero iff series @i is hidden.
 3648  */
 3649 
 3650 int series_is_hidden (const DATASET *dset, int i)
 3651 {
 3652     return dset->varinfo[i]->flags & VAR_HIDDEN;
 3653 }
 3654 
 3655 /**
 3656  * series_is_generated:
 3657  * @dset: pointer to dataset.
 3658  * @i: index number of series.
 3659  *
 3660  * Returns: non-zero iff series @i was generated using
 3661  * a formula or transformation function.
 3662  */
 3663 
 3664 int series_is_generated (const DATASET *dset, int i)
 3665 {
 3666     return dset->varinfo[i]->flags & VAR_GENERATED;
 3667 }
 3668 
 3669 /**
 3670  * series_is_listarg:
 3671  * @dset: pointer to dataset.
 3672  * @i: index number of series.
 3673  *
 3674  * Returns: non-zero iff series @i has been marked as
 3675  * belonging to a list argument to a function.
 3676  */
 3677 
 3678 int series_is_listarg (const DATASET *dset, int i)
 3679 {
 3680     return dset->varinfo[i]->flags & VAR_LISTARG;
 3681 }
 3682 
 3683 /**
 3684  * series_is_coded:
 3685  * @dset: pointer to dataset.
 3686  * @i: index number of series.
 3687  *
 3688  * Returns: non-zero iff series @i has been marked as
 3689  * "coded", meaning that its numerical values represent
 3690  * an arbitrary encoding of qualitative characteristics.
 3691  */
 3692 
 3693 int series_is_coded (const DATASET *dset, int i)
 3694 {
 3695     return dset->varinfo[i]->flags & VAR_CODED;
 3696 }
 3697 
 3698 /**
 3699  * series_is_integer_valued:
 3700  * @dset: pointer to dataset.
 3701  * @i: index number of series.
 3702  *
 3703  * Returns: non-zero iff all values in series @i are
 3704  * representable as integers (ignoring missing values).
 3705  */
 3706 
 3707 int series_is_integer_valued (const DATASET *dset, int i)
 3708 {
 3709     const double *x = dset->Z[i];
 3710     int t, n_ok = 0, ret = 1;
 3711 
 3712     for (t=0; t<dset->n; t++) {
 3713     if (!na(x[t])) {
 3714         n_ok++;
 3715         if (x[t] != floor(x[t])) {
 3716         ret = 0;
 3717         break;
 3718         } else if (x[t] > INT_MAX || x[t] < INT_MIN) {
 3719         ret = 0;
 3720         break;
 3721         }
 3722     }
 3723     }
 3724 
 3725     if (n_ok == 0) {
 3726     /* don't let an entirely missing series count as
 3727        "integer-valued"
 3728     */
 3729     ret = 0;
 3730     }
 3731 
 3732     return ret;
 3733 }
 3734 
 3735 /**
 3736  * series_set_flag:
 3737  * @dset: pointer to dataset.
 3738  * @i: index number of series.
 3739  * @flag: flag to set.
 3740  *
 3741  * Sets the given @flag on series @i.
 3742  */
 3743 
 3744 void series_set_flag (DATASET *dset, int i, VarFlags flag)
 3745 {
 3746     if (i > 0 && i < dset->v) {
 3747     dset->varinfo[i]->flags |= flag;
 3748     }
 3749 }
 3750 
 3751 /**
 3752  * series_unset_flag:
 3753  * @dset: pointer to dataset.
 3754  * @i: index number of series.
 3755  * @flag: flag to remove.
 3756  *
 3757  * Unsets the given @flag on series @i.
 3758  */
 3759 
 3760 void series_unset_flag (DATASET *dset, int i, VarFlags flag)
 3761 {
 3762     if (i > 0 && i < dset->v) {
 3763     dset->varinfo[i]->flags &= ~flag;
 3764     }
 3765 }
 3766 
 3767 /**
 3768  * series_get_flags:
 3769  * @dset: pointer to dataset.
 3770  * @i: index number of series.
 3771  *
 3772  * Returns: the flags set series @i.
 3773  */
 3774 
 3775 VarFlags series_get_flags (const DATASET *dset, int i)
 3776 {
 3777     if (i >= 0 && i < dset->v) {
 3778     return dset->varinfo[i]->flags;
 3779     } else {
 3780     return 0;
 3781     }
 3782 }
 3783 
 3784 /**
 3785  * series_zero_flags:
 3786  * @dset: pointer to dataset.
 3787  * @i: index number of series.
 3788  *
 3789  * Sets flags on series @i to zero.
 3790  */
 3791 
 3792 void series_zero_flags (DATASET *dset, int i)
 3793 {
 3794     if (i >= 0 && i < dset->v) {
 3795     dset->varinfo[i]->flags = 0;
 3796     }
 3797 }
 3798 
 3799 /**
 3800  * series_get_label:
 3801  * @dset: pointer to dataset.
 3802  * @i: index number of series.
 3803  *
 3804  * Returns: the descriptive label for series @i.
 3805  */
 3806 
 3807 const char *series_get_label (const DATASET *dset, int i)
 3808 {
 3809     if (i >= 0 && i < dset->v) {
 3810     return dset->varinfo[i]->label;
 3811     } else {
 3812     return NULL;
 3813     }
 3814 }
 3815 
 3816 /**
 3817  * series_get_display_name:
 3818  * @dset: pointer to dataset.
 3819  * @i: index number of series.
 3820  *
 3821  * Returns: the display name for series @i.
 3822  */
 3823 
 3824 const char *series_get_display_name (const DATASET *dset, int i)
 3825 {
 3826     if (i >= 0 && i < dset->v) {
 3827     return dset->varinfo[i]->display_name;
 3828     } else {
 3829     return NULL;
 3830     }
 3831 }
 3832 
 3833 /**
 3834  * series_get_parent_name:
 3835  * @dset: pointer to dataset.
 3836  * @i: index number of series.
 3837  *
 3838  * Returns: the name of the "parent" of series @i
 3839  * (e.g. if series @i is a lag of some other series)
 3840  * or NULL if the series has no parent.
 3841  */
 3842 
 3843 const char *series_get_parent_name (const DATASET *dset, int i)
 3844 {
 3845     if (i > 0 && i < dset->v) {
 3846     if (dset->varinfo[i]->parent[0] != '\0') {
 3847         return dset->varinfo[i]->parent;
 3848     }
 3849     }
 3850 
 3851     return NULL;
 3852 }
 3853 
 3854 /**
 3855  * series_get_parent_id:
 3856  * @dset: pointer to dataset.
 3857  * @i: index number of series.
 3858  *
 3859  * Returns: the ID number of the "parent" of series @i
 3860  * (e.g. if series @i is a lag of some other series)
 3861  * or -1 if the series has no parent.
 3862  */
 3863 
 3864 int series_get_parent_id (const DATASET *dset, int i)
 3865 {
 3866     if (i > 0 && i < dset->v) {
 3867     const char *pname = dset->varinfo[i]->parent;
 3868 
 3869     if (*pname != '\0') {
 3870         return current_series_index(dset, pname);
 3871     }
 3872     }
 3873 
 3874     return -1;
 3875 }
 3876 
 3877 int series_get_lag (const DATASET *dset, int i)
 3878 {
 3879     if (i > 0 && i < dset->v) {
 3880     return dset->varinfo[i]->lag;
 3881     } else {
 3882     return 0;
 3883     }
 3884 }
 3885 
 3886 int series_get_transform (const DATASET *dset, int i)
 3887 {
 3888     if (i > 0 && i < dset->v) {
 3889     return dset->varinfo[i]->transform;
 3890     } else {
 3891     return 0;
 3892     }
 3893 }
 3894 
 3895 /**
 3896  * series_get_compact_method:
 3897  * @dset: pointer to dataset.
 3898  * @i: index number of series.
 3899  *
 3900  * Returns: the compaction method set for series @i.
 3901  */
 3902 
 3903 int series_get_compact_method (const DATASET *dset, int i)
 3904 {
 3905     if (i > 0 && i < dset->v) {
 3906     return dset->varinfo[i]->compact_method;
 3907     } else {
 3908     return 0;
 3909     }
 3910 }
 3911 
 3912 /**
 3913  * series_get_stack_level:
 3914  * @dset: pointer to dataset.
 3915  * @i: index number of series.
 3916  *
 3917  * Returns: the stack level of series @i.
 3918  */
 3919 
 3920 int series_get_stack_level (const DATASET *dset, int i)
 3921 {
 3922     if (i >= 0 && i < dset->v) {
 3923     return dset->varinfo[i]->stack_level;
 3924     } else {
 3925     return 0;
 3926     }
 3927 }
 3928 
 3929 void series_set_mtime (DATASET *dset, int i)
 3930 {
 3931     if (i > 0 && i < dset->v) {
 3932     dset->varinfo[i]->mtime = gretl_monotonic_time();
 3933     }
 3934 }
 3935 
 3936 gint64 series_get_mtime (const DATASET *dset, int i)
 3937 {
 3938     if (i > 0 && i < dset->v) {
 3939     return dset->varinfo[i]->mtime;
 3940     } else {
 3941     return 0;
 3942     }
 3943 }
 3944 
 3945 void series_set_label (DATASET *dset, int i,
 3946                const char *s)
 3947 {
 3948     if (i > 0 && i < dset->v) {
 3949     copy_label(&dset->varinfo[i]->label, s);
 3950     }
 3951 }
 3952 
 3953 void series_set_display_name (DATASET *dset, int i,
 3954                   const char *s)
 3955 {
 3956     if (i > 0 && i < dset->v) {
 3957     char *targ = dset->varinfo[i]->display_name;
 3958 
 3959     *targ = '\0';
 3960     strncat(targ, s, MAXDISP-1);
 3961     }
 3962 }
 3963 
 3964 void series_set_compact_method (DATASET *dset, int i,
 3965                 int method)
 3966 {
 3967     if (i > 0 && i < dset->v) {
 3968     dset->varinfo[i]->compact_method = method;
 3969     }
 3970 }
 3971 
 3972 void series_set_parent (DATASET *dset, int i,
 3973             const char *parent)
 3974 {
 3975     if (i > 0 && i < dset->v) {
 3976     strcpy(dset->varinfo[i]->parent, parent);
 3977     }
 3978 }
 3979 
 3980 void series_set_transform (DATASET *dset, int i,
 3981                int transform)
 3982 {
 3983     if (i > 0 && i < dset->v) {
 3984     dset->varinfo[i]->transform = transform;
 3985     }
 3986 }
 3987 
 3988 void series_set_lag (DATASET *dset, int i, int lag)
 3989 {
 3990     if (i > 0 && i < dset->v) {
 3991     dset->varinfo[i]->lag = lag;
 3992     }
 3993 }
 3994 
 3995 void series_set_stack_level (DATASET *dset, int i, int level)
 3996 {
 3997     if (i > 0 && i < dset->v) {
 3998     dset->varinfo[i]->stack_level = level;
 3999     }
 4000 }
 4001 
 4002 void series_increment_stack_level (DATASET *dset, int i)
 4003 {
 4004     if (i > 0 && i < dset->v) {
 4005     dset->varinfo[i]->stack_level += 1;
 4006     }
 4007 }
 4008 
 4009 void series_decrement_stack_level (DATASET *dset, int i)
 4010 {
 4011     if (i > 0 && i < dset->v) {
 4012     dset->varinfo[i]->stack_level -= 1;
 4013     }
 4014 }
 4015 
 4016 void series_delete_metadata (DATASET *dset, int i)
 4017 {
 4018     if (i > 0 && i < dset->v &&
 4019     dset->varinfo != NULL &&
 4020     dset->varinfo[i] != NULL) {
 4021     dset->varinfo[i]->lag = 0;
 4022     dset->varinfo[i]->transform = 0;
 4023     dset->varinfo[i]->parent[0] = '\0';
 4024     }
 4025 }
 4026 
 4027 void series_ensure_level_zero (DATASET *dset)
 4028 {
 4029     if (dset != NULL) {
 4030     int i, n = 0;
 4031 
 4032     for (i=1; i<dset->v; i++) {
 4033         if (dset->varinfo[i]->stack_level > 0) {
 4034         dset->varinfo[i]->stack_level = 0;
 4035         n++;
 4036         }
 4037     }
 4038 #if 0
 4039     if (n > 0) {
 4040         fprintf(stderr, "Unauthorized access to series detected!\n");
 4041     }
 4042 #endif
 4043     }
 4044 }
 4045 
 4046 void series_attach_string_table (DATASET *dset, int i, void *ptr)
 4047 {
 4048     if (dset != NULL && i > 0 && i < dset->v) {
 4049     series_set_discrete(dset, i, 1);
 4050     dset->varinfo[i]->st = ptr;
 4051     }
 4052 }
 4053 
 4054 void series_destroy_string_table (DATASET *dset, int i)
 4055 {
 4056     if (dset != NULL && i > 0 && i < dset->v) {
 4057     series_table_destroy(dset->varinfo[i]->st);
 4058     dset->varinfo[i]->st = NULL;
 4059     }
 4060 }
 4061 
 4062 /**
 4063  * is_string_valued:
 4064  * @dset: pointer to dataset.
 4065  * @i: index number of series.
 4066  *
 4067  * Returns: 1 if series @i has a table of string values
 4068  * (that is, a mapping from numerical values to associated
 4069  * string values), otherwise 0.
 4070  */
 4071 
 4072 int is_string_valued (const DATASET *dset, int i)
 4073 {
 4074     if (dset != NULL && i > 0 && i < dset->v) {
 4075     return dset->varinfo[i]->st != NULL;
 4076     } else {
 4077     return 0;
 4078     }
 4079 }
 4080 
 4081 /**
 4082  * series_get_string_table:
 4083  * @dset: pointer to dataset.
 4084  * @i: index number of series.
 4085  *
 4086  * Returns: the string table attched to series @i or NULL if
 4087  * there is no such table.
 4088  */
 4089 
 4090 series_table *series_get_string_table (const DATASET *dset, int i)
 4091 {
 4092     if (dset != NULL && i > 0 && i < dset->v) {
 4093     return dset->varinfo[i]->st;
 4094     } else {
 4095     return NULL;
 4096     }
 4097 }
 4098 
 4099 /**
 4100  * series_get_string_for_obs:
 4101  * @dset: pointer to dataset.
 4102  * @i: index number of series.
 4103  * @t: 0-based index of observation.
 4104  *
 4105  * Returns: the string associated with the numerical value of
 4106  * series @i at observation @t, or NULL if there is no such string.
 4107  */
 4108 
 4109 const char *series_get_string_for_obs (const DATASET *dset, int i,
 4110                        int t)
 4111 {
 4112     const char *ret = NULL;
 4113 
 4114     if (i > 0 && i < dset->v && dset->varinfo[i]->st != NULL) {
 4115     ret = series_table_get_string(dset->varinfo[i]->st,
 4116                       dset->Z[i][t]);
 4117     }
 4118 
 4119     return ret;
 4120 }
 4121 
 4122 /**
 4123  * series_get_string_for_value:
 4124  * @dset: pointer to dataset.
 4125  * @i: index number of series.
 4126  * @val: the value to look up.
 4127  *
 4128  * Returns: the string associated with numerical value @val of
 4129  * series @i, or NULL if there is no such string.
 4130  */
 4131 
 4132 const char *series_get_string_for_value (const DATASET *dset, int i,
 4133                      double val)
 4134 {
 4135     const char *ret = NULL;
 4136 
 4137     if (i > 0 && i < dset->v && dset->varinfo[i]->st != NULL) {
 4138     ret = series_table_get_string(dset->varinfo[i]->st, val);
 4139     }
 4140 
 4141     return ret;
 4142 }
 4143 
 4144 /**
 4145  * series_set_string_val:
 4146  * @dset: pointer to dataset.
 4147  * @i: index number of series.
 4148  * @t: 0-based index of observation.
 4149  * @s: the string value to set.
 4150  *
 4151  * Attempts to set the string value for observation @t of series @i
 4152  * to @s. This will fail if the series in question does not have
 4153  * an associated table of string values.
 4154  *
 4155  * Returns: 0 on success, non-zero code on error.
 4156  */
 4157 
 4158 int series_set_string_val (DATASET *dset, int i, int t, const char *s)
 4159 {
 4160     int err = 0;
 4161 
 4162     if (i <= 0 || i >= dset->v) {
 4163     err = E_DATA;
 4164     } else if (dset->varinfo[i]->st == NULL) {
 4165     err = E_TYPES;
 4166     } else {
 4167     series_table *st = dset->varinfo[i]->st;
 4168     double x = series_table_get_value(st, s);
 4169 
 4170     if (na(x)) {
 4171         int k = series_table_add_string(st, s);
 4172 
 4173         if (k < 0) {
 4174         err = E_ALLOC;
 4175         } else {
 4176         dset->Z[i][t] = k;
 4177         }
 4178     } else {
 4179         dset->Z[i][t] = x;
 4180     }
 4181     }
 4182 
 4183     return err;
 4184 }
 4185 
 4186 /**
 4187  * string_series_assign_value:
 4188  * @dset: pointer to dataset.
 4189  * @i: index number of string-valued series.
 4190  * @t: 0-based index of observation.
 4191  * @x: the numeric value to set.
 4192  *
 4193  * Attempts to set the value for observation @t of series @i
 4194  * to @x. This will fail if the @x falls outside of the range
 4195  * of values supported by the string table for the series.
 4196  *
 4197  * Returns: 0 on success, non-zero code on error.
 4198  */
 4199 
 4200 int string_series_assign_value (DATASET *dset, int i,
 4201                 int t, double x)
 4202 {
 4203     series_table *st = NULL;
 4204     int err = 0;
 4205 
 4206     if (i <= 0 || i >= dset->v) {
 4207     err = E_DATA;
 4208     } else if (na(x)) {
 4209     dset->Z[i][t] = x;
 4210     } else if (x != floor(x)) {
 4211     err = E_TYPES;
 4212     } else if ((st = dset->varinfo[i]->st) == NULL) {
 4213     err = E_TYPES;
 4214     } else if (series_table_get_string(st, x) == NULL) {
 4215     err = E_DATA;
 4216     } else {
 4217     dset->Z[i][t] = x;
 4218     }
 4219 
 4220     return err;
 4221 }
 4222 
 4223 /**
 4224  * series_decode_string:
 4225  * @dset: pointer to dataset.
 4226  * @i: index number of series.
 4227  * @s: string to decode.
 4228  *
 4229  * Returns: the numerical value associated with the string
 4230  * @s for series @i, or #NADBL if there's no such value.
 4231  */
 4232 
 4233 double series_decode_string (const DATASET *dset, int i, const char *s)
 4234 {
 4235     double ret = NADBL;
 4236 
 4237     if (i > 0 && i < dset->v && dset->varinfo[i]->st != NULL) {
 4238     ret = series_table_get_value(dset->varinfo[i]->st, s);
 4239     }
 4240 
 4241     return ret;
 4242 }
 4243 
 4244 /**
 4245  * series_get_string_vals:
 4246  * @dset: pointer to dataset.
 4247  * @i: index number of series.
 4248  * @n_strs: location to receive the number of strings, or NULL.
 4249  *
 4250  * Returns: the array of strings associated with distinct numerical
 4251  * values of series @i, or NULL if there's no such array. The returned
 4252  * array should not be modified in any way; copy the strings first if
 4253  * you need to modify them.
 4254  */
 4255 
 4256 char **series_get_string_vals (const DATASET *dset, int i,
 4257                    int *n_strs, int subsample)
 4258 {
 4259     char **strs = NULL;
 4260     int n = 0;
 4261 
 4262     if (i > 0 && i < dset->v && dset->varinfo[i]->st != NULL) {
 4263     strs = series_table_get_strings(dset->varinfo[i]->st, &n);
 4264     }
 4265 
 4266     if (strs != NULL && subsample && dataset_is_subsampled(dset)) {
 4267     static char **substrs = NULL;
 4268     const double *x = dset->Z[i] + dset->t1;
 4269     int T = dset->t2 - dset->t1 + 1;
 4270     gretl_matrix *valid;
 4271     int err = 0;
 4272 
 4273     if (substrs != NULL) {
 4274         free(substrs);
 4275         substrs = NULL;
 4276     }
 4277     valid = gretl_matrix_values(x, T, OPT_NONE, &err);
 4278     if (err) {
 4279         strs = NULL;
 4280         n = 0;
 4281     } else {
 4282         int j, k, nv = valid->rows;
 4283 
 4284         substrs = strings_array_new(nv);
 4285         for (j=0; j<nv; j++) {
 4286         k = gretl_vector_get(valid, j) - 1;
 4287         substrs[j] = strs[k];
 4288         }
 4289         strs = substrs;
 4290         n = nv;
 4291         gretl_matrix_free(valid);
 4292     }
 4293     }
 4294 
 4295     if (n_strs != NULL) {
 4296     *n_strs = n;
 4297     }
 4298 
 4299     return strs;
 4300 }
 4301 
 4302 /**
 4303  * series_get_string_width:
 4304  * @dset: pointer to dataset.
 4305  * @i: index number of series.
 4306  *
 4307  * Returns: the maximum of (a) the number of characters in the
 4308  * name of series @i and (b) the number of bytes in the longest
 4309  * "string value" attached to series @i, if applicable; or 0
 4310  * if @i is not a valid series index.
 4311  */
 4312 
 4313 int series_get_string_width (const DATASET *dset, int i)
 4314 {
 4315     int n = 0;
 4316 
 4317     if (i > 0 && i < dset->v) {
 4318     n = strlen(dset->varname[i]);
 4319     if (dset->varinfo[i]->st != NULL) {
 4320         char **S;
 4321         int j, ns, m;
 4322 
 4323         S = series_table_get_strings(dset->varinfo[i]->st, &ns);
 4324         for (j=0; j<ns; j++) {
 4325         m = g_utf8_strlen(S[j], -1);
 4326         if (m > n) {
 4327             n = m;
 4328         }
 4329         }
 4330     }
 4331     }
 4332 
 4333     return n;
 4334 }
 4335 
 4336 /**
 4337  * steal_string_table:
 4338  * @l_dset: pointer to recipient dataset.
 4339  * @lvar: index number of target series.
 4340  * @r_dset: pointer to donor dataset.
 4341  * @rvar: index number of source series.
 4342  *
 4343  * Detaches the string table from @rvar in @r_dset and attaches it
 4344  * to @lvar in @l_dset,
 4345  *
 4346  * Returns: 0 on success, non-zero code on error.
 4347  */
 4348 
 4349 int steal_string_table (DATASET *l_dset, int lvar,
 4350             DATASET *r_dset, int rvar)
 4351 {
 4352     if (l_dset != r_dset || lvar != rvar) {
 4353     l_dset->varinfo[lvar]->st = r_dset->varinfo[rvar]->st;
 4354     r_dset->varinfo[rvar]->st = NULL;
 4355     series_set_discrete(l_dset, lvar, 1);
 4356     }
 4357 
 4358     return 0;
 4359 }
 4360 
 4361 /**
 4362  * merge_string_tables:
 4363  * @l_dset: pointer to current dataset.
 4364  * @lvar: index number of target series.
 4365  * @r_dset: pointer to dataset to be appended.
 4366  * @rvar: index number of source series.
 4367  *
 4368  * Translates the encoding of the string-values of series @rvar
 4369  * in @r_dset to that of series @lvar in @l_dset, adding extra
 4370  * strings as needed.
 4371  *
 4372  * Returns: 0 on success, non-zero code on error.
 4373  */
 4374 
 4375 int merge_string_tables (DATASET *l_dset, int lvar,
 4376              DATASET *r_dset, int rvar)
 4377 {
 4378     series_table *lst = l_dset->varinfo[lvar]->st;
 4379     double dx, *x = r_dset->Z[rvar];
 4380     const char *sr;
 4381     int t, idx, err = 0;
 4382 
 4383     for (t=0; t<r_dset->n && !err; t++) {
 4384     if (na(x[t])) {
 4385         continue;
 4386     }
 4387     /* get the right-hand side string associated with x[t] */
 4388     sr = series_get_string_for_value(r_dset, rvar, x[t]);
 4389     /* and look up its numeric code on the left */
 4390     dx = series_decode_string(l_dset, lvar, sr);
 4391     if (!na(dx)) {
 4392         /* got a match: apply the code from @lst */
 4393         x[t] = dx;
 4394     } else {
 4395         /* no match: we need to add a string to @lst */
 4396         idx = series_table_add_string(lst, sr);
 4397         if (idx < 0) {
 4398         err = E_ALLOC;
 4399         } else {
 4400         x[t] = (double) idx;
 4401         }
 4402     }
 4403     }
 4404 
 4405     return err;
 4406 }
 4407 
 4408 static void maybe_adjust_label (DATASET *dset, int v,
 4409                 char **S, int ns)
 4410 {
 4411     int i, len = 3 * ns; /* "=" + ", " */
 4412     char *tmp;
 4413 
 4414     for (i=0; i<ns; i++) {
 4415     len += strlen(S[i]) + 1 + floor(log10(1.0 + i));
 4416     }
 4417 
 4418     /* let's not create a super-long series label */
 4419     if (len > 255) {
 4420     return;
 4421     }
 4422 
 4423     tmp = calloc(len + 1, 1);
 4424 
 4425     if (tmp != NULL) {
 4426     char bit[16];
 4427 
 4428     for (i=0; i<ns; i++) {
 4429         sprintf(bit, "%d=", i+1);
 4430         strcat(tmp, bit);
 4431         strcat(tmp, S[i]);
 4432         if (i < ns - 1) {
 4433         strcat(tmp, ", ");
 4434         }
 4435     }
 4436     copy_label(&dset->varinfo[v]->label, tmp);
 4437     free(tmp);
 4438     }
 4439 }
 4440 
 4441 /* Encode the strings in @a into numerical values in series
 4442    @v of dataset @dset. "Return" via @pU the array of unique
 4443    string values and via @pnu the number of such values.
 4444 */
 4445 
 4446 static int alt_set_strvals (DATASET *dset, int v, gretl_array *a,
 4447                 char ***pU, int *pnu)
 4448 {
 4449     char **S, **U = NULL;
 4450     double *x = dset->Z[v];
 4451     int i, pos, ns, nu = 0;
 4452     int err = 0;
 4453 
 4454     S = gretl_array_get_strings(a, &ns);
 4455 
 4456     for (i=0; i<ns && !err; i++) {
 4457     err = strings_array_add_uniq(&U, &nu, S[i], &pos);
 4458     if (!err) {
 4459         x[i] = pos + 1;
 4460     }
 4461     }
 4462 
 4463     if (!err) {
 4464     *pU = U;
 4465     *pnu = nu;
 4466     } else if (U != NULL) {
 4467     strings_array_free(U, nu);
 4468     }
 4469 
 4470     return err;
 4471 }
 4472 
 4473 /* Recognize the case where we have an "empty" series
 4474    and an array of strings of full dataset length.
 4475 */
 4476 
 4477 static int alt_strvals_case (DATASET *dset, int v, gretl_array *a)
 4478 {
 4479     double *x = dset->Z[v];
 4480     double x0 = dset->Z[v][0];
 4481     int i, xconst = 1;
 4482 
 4483     for (i=1; i<dset->n && xconst; i++) {
 4484     if (na(x0)) {
 4485         if (!na(x[i])) {
 4486         xconst = 0;
 4487         }
 4488     } else if (x[i] != x0) {
 4489         xconst = 0;
 4490     }
 4491     }
 4492 
 4493     return xconst && gretl_array_get_length(a) == dset->n;
 4494 }
 4495 
 4496 /* here we're trying to set strings values on a series from
 4497    scratch */
 4498 
 4499 int series_set_string_vals (DATASET *dset, int i, void *ptr)
 4500 {
 4501     gretl_array *a = ptr;
 4502     gretl_matrix *vals = NULL;
 4503     char **S = NULL;
 4504     int ns = 0;
 4505     int err = 0;
 4506 
 4507     if (a == NULL || dset == NULL || i < 1 || i >= dset->v) {
 4508     return E_DATA;
 4509     }
 4510 
 4511     if (alt_strvals_case(dset, i, a)) {
 4512     err = alt_set_strvals(dset, i, a, &S, &ns);
 4513     if (err) {
 4514         return err;
 4515     } else {
 4516         goto do_strtable;
 4517     }
 4518     }
 4519 
 4520     /* get sorted vector of unique values */
 4521     vals = gretl_matrix_values(dset->Z[i], dset->n, OPT_S, &err);
 4522 
 4523     if (!err) {
 4524     int i, nvals = gretl_vector_get_length(vals);
 4525     double x0 = gretl_vector_get(vals, 0);
 4526     double x1 = gretl_vector_get(vals, nvals - 1);
 4527 
 4528     if (x0 < 1.0) {
 4529         gretl_errmsg_set("The minimum value of the target series "
 4530                  "must be >= 1");
 4531         err = E_DATA;
 4532     } else {
 4533         /* the values should all be integers */
 4534         for (i=0; i<nvals && !err; i++) {
 4535         x1 = gretl_vector_get(vals, i);
 4536         if (x1 != floor(x1)) {
 4537             gretl_errmsg_set("The series values must be integers");
 4538             err = E_DATA;
 4539         }
 4540         }
 4541     }
 4542 
 4543     if (!err) {
 4544         S = gretl_array_get_stringify_strings(a, (int) x1, &ns, &err);
 4545     }
 4546 
 4547     if (!err) {
 4548         /* the strings should all be UTF-8 */
 4549         for (i=0; i<ns && !err; i++) {
 4550         if (!g_utf8_validate(S[i], -1, NULL)) {
 4551             gretl_errmsg_sprintf("String %d is not valid UTF-8", i+1);
 4552             err = E_DATA;
 4553         }
 4554         }
 4555     }
 4556     }
 4557 
 4558  do_strtable:
 4559 
 4560     if (!err) {
 4561     series_table *st = series_table_new(S, ns);
 4562 
 4563     if (st == NULL) {
 4564         err = E_ALLOC;
 4565     } else {
 4566         if (dset->varinfo[i]->st != NULL) {
 4567         /* remove any pre-existing table */
 4568         series_table_destroy(dset->varinfo[i]->st);
 4569         }
 4570         series_set_discrete(dset, i, 1);
 4571         dset->varinfo[i]->st = st;
 4572         maybe_adjust_label(dset, i, S, ns);
 4573     }
 4574     }
 4575 
 4576     if (err && S != NULL && ns > 0) {
 4577     strings_array_free(S, ns);
 4578     }
 4579 
 4580     gretl_matrix_free(vals);
 4581 
 4582     return err;
 4583 }
 4584 
 4585 int set_panel_groups_name (DATASET *dset, const char *vname)
 4586 {
 4587     if (dset->pangrps != NULL) {
 4588     free(dset->pangrps);
 4589     }
 4590 
 4591     dset->pangrps = gretl_strdup(vname);
 4592 
 4593     return (dset->pangrps == NULL)? E_ALLOC : 0;
 4594 }
 4595 
 4596 /* This should be called only after the "group names"
 4597    property of @dset has been (recently) validated, via
 4598    panel_group_names_ok().
 4599 */
 4600 
 4601 const char *get_panel_group_name (const DATASET *dset, int obs)
 4602 {
 4603     const char *s = NULL;
 4604 
 4605     if (dataset_is_panel(dset) && dset->pangrps != NULL &&
 4606     obs >= 0 && obs < dset->n) {
 4607     int v = current_series_index(dset, dset->pangrps);
 4608     series_table *st;
 4609 
 4610     if ((st = series_get_string_table(dset, v)) != NULL) {
 4611         s = series_table_get_string(st, dset->Z[v][obs]);
 4612     }
 4613     }
 4614 
 4615     return (s != NULL)? s : "??";
 4616 }
 4617 
 4618 int panel_group_names_ok (const DATASET *dset, int maxlen)
 4619 {
 4620     int ok = 0;
 4621 
 4622     if (dataset_is_panel(dset) && dset->pangrps != NULL) {
 4623     int ns, v = current_series_index(dset, dset->pangrps);
 4624 
 4625     if (v > 0 && v < dset->v) {
 4626         char **S = series_get_string_vals(dset, v, &ns, 0);
 4627 
 4628         if (S != NULL && ns >= dset->n / dset->pd) {
 4629         ok = 1; /* provisional */
 4630         if (maxlen > 0) {
 4631             int i;
 4632 
 4633             for (i=0; i<ns; i++) {
 4634             if (strlen(S[i]) > maxlen) {
 4635                 ok = 0;
 4636                 break;
 4637             }
 4638             }
 4639         }
 4640         }
 4641     }
 4642     }
 4643 
 4644     return ok;
 4645 }
 4646 
 4647 const char *panel_group_names_varname (const DATASET *dset)
 4648 {
 4649     if (dataset_is_panel(dset) && dset->pangrps != NULL) {
 4650     int ns, v = current_series_index(dset, dset->pangrps);
 4651 
 4652     if (v > 0 && v < dset->v) {
 4653         char **S = series_get_string_vals(dset, v, &ns, 0);
 4654 
 4655         if (S != NULL) {
 4656         int ng = dset->n / dset->pd;
 4657 
 4658         if (ns >= ng) {
 4659             return dset->pangrps;
 4660         }
 4661         }
 4662     }
 4663     }
 4664 
 4665     return NULL;
 4666 }
 4667 
 4668 int is_panel_group_names_series (const DATASET *dset, int v)
 4669 {
 4670     if (dataset_is_panel(dset) && dset->pangrps != NULL) {
 4671     return v == current_series_index(dset, dset->pangrps);
 4672     } else {
 4673     return 0;
 4674     }
 4675 }
 4676 
 4677 static int suitable_group_names_series (const DATASET *dset,
 4678                     int maxlen,
 4679                     int exclude)
 4680 {
 4681     int i, vfound = 0;
 4682 
 4683     for (i=1; i<dset->v && !vfound; i++) {
 4684     if (i == exclude) {
 4685         continue;
 4686     }
 4687     if (is_string_valued(dset, i)) {
 4688         int ns = 0;
 4689         char **S = series_get_string_vals(dset, i, &ns, 0);
 4690 
 4691         if (S != NULL && ns >= dset->n / dset->pd) {
 4692         const char *sbak = NULL;
 4693         int t, u, ubak = -1;
 4694         int fail = 0;
 4695 
 4696         for (t=dset->t1; t<=dset->t2 && !fail; t++) {
 4697             const char *st = series_get_string_for_obs(dset, i, t);
 4698 
 4699             u = t / dset->pd;
 4700             if (u == ubak && strcmp(st, sbak)) {
 4701             /* same unit, different label: no */
 4702             fail = 1;
 4703             } else if (ubak >= 0 && u != ubak && !strcmp(st, sbak)) {
 4704             /* different unit, same label: no */
 4705             fail = 2;
 4706             }
 4707             if (!fail && maxlen > 0 && strlen(st) > maxlen) {
 4708             fail = 1;
 4709             }
 4710             ubak = u;
 4711             sbak = st;
 4712         }
 4713         if (!fail) {
 4714             vfound = i;
 4715         }
 4716         }
 4717     }
 4718     }
 4719 
 4720     return vfound;
 4721 }
 4722 
 4723 /* For plotting purposes, try to get labels for panel groups,
 4724    subject to the constraint that they should be no longer
 4725    than @maxlen. If successful, this will return an array of
 4726    at least N strings, where N is the cross-sectional
 4727    dimension of the panel. This array should be treated as
 4728    read-only.
 4729 */
 4730 
 4731 series_table *get_panel_group_table (const DATASET *dset,
 4732                      int maxlen, int *pv)
 4733 {
 4734     series_table *st = NULL;
 4735     int vpg = 0;
 4736 
 4737     if (dset->pangrps != NULL) {
 4738     vpg = current_series_index(dset, dset->pangrps);
 4739     }
 4740 
 4741     /* first see if we have valid group labels set explicitly */
 4742     if (vpg > 0 && panel_group_names_ok(dset, maxlen)) {
 4743     st = dset->varinfo[vpg]->st;
 4744     }
 4745 
 4746     if (st == NULL) {
 4747     /* can we find a suitable string-valued series? */
 4748     int altv = suitable_group_names_series(dset, maxlen, vpg);
 4749 
 4750     if (altv > 0) {
 4751         vpg = altv;
 4752         st = dset->varinfo[vpg]->st;
 4753     }
 4754     }
 4755 
 4756     *pv = (st != NULL)? vpg : 0;
 4757 
 4758     return st;
 4759 }
 4760 
 4761 int is_dataset_series (const DATASET *dset, const double *x)
 4762 {
 4763     int i;
 4764 
 4765     for (i=dset->v-1; i>=0; i--) {
 4766     if (x == dset->Z[i]) {
 4767         return 1;
 4768     }
 4769     }
 4770 
 4771     return 0;
 4772 }
 4773 
 4774 static int effective_daily_skip (int delta, int wd, int pd)
 4775 {
 4776     int k, skip = delta - 1;
 4777 
 4778     if (pd < 7) {
 4779     skip = 0;
 4780     for (k=1; k<delta; k++) {
 4781         wd = (wd == 0)? 6 : wd - 1;
 4782         if (pd == 6) {
 4783         skip += (wd != 0);
 4784         } else {
 4785         skip += (wd != 0 && wd != 6);
 4786         }
 4787     }
 4788     }
 4789 
 4790     return skip;
 4791 }
 4792 
 4793 /* If we get here we've already checked that @dset is dated daily
 4794    data, and that @pd is a valid daily periodicity greater than or
 4795    equal to the current dset->pd.
 4796 */
 4797 
 4798 static int pad_daily_data (DATASET *dset, int pd, PRN *prn)
 4799 {
 4800     DATASET *bigset = NULL;
 4801     char datestr[OBSLEN];
 4802     guint32 ed, ed0 = 0, edbak = 0;
 4803     int wd, skip, totskip = 0;
 4804     int t, err = 0;
 4805 
 4806     for (t=0; t<dset->n; t++) {
 4807     ntodate(datestr, t, dset);
 4808     if (t == 0) {
 4809         ed0 = edbak = get_epoch_day(datestr);
 4810     } else {
 4811         wd = weekday_from_date(datestr);
 4812         ed = get_epoch_day(datestr);
 4813         skip = effective_daily_skip(ed - edbak, wd, pd);
 4814         totskip += skip;
 4815         edbak = ed;
 4816     }
 4817     }
 4818 
 4819     if (totskip == 0) {
 4820     pprintf(prn, "Dataset is already complete for %d-day calendar", pd);
 4821     return 0;
 4822     }
 4823 
 4824     bigset = create_new_dataset(dset->v, dset->n + totskip, NO_MARKERS);
 4825 
 4826     if (bigset == NULL) {
 4827     err = E_ALLOC;
 4828     } else {
 4829     int i, s = 0;
 4830 
 4831     edbak = ed0;
 4832 
 4833     for (t=0; t<dset->n; t++) {
 4834         if (t > 0) {
 4835         ntodate(datestr, t, dset);
 4836         wd = weekday_from_date(datestr);
 4837         ed = get_epoch_day(datestr);
 4838         s += 1 + effective_daily_skip(ed - edbak, wd, pd);
 4839         edbak = ed;
 4840         }
 4841         for (i=1; i<dset->v; i++) {
 4842         bigset->Z[i][s] = dset->Z[i][t];
 4843         }
 4844     }
 4845 
 4846     bigset->varname = dset->varname;
 4847     bigset->varinfo = dset->varinfo;
 4848     bigset->descrip = dset->descrip;
 4849 
 4850     bigset->pd = pd;
 4851     bigset->structure = TIME_SERIES;
 4852     bigset->sd0 = (double) ed0;
 4853     strcpy(bigset->stobs, dset->stobs);
 4854     ntodate(bigset->endobs, bigset->n - 1, bigset);
 4855 
 4856     dset->varname = NULL;
 4857     dset->varinfo = NULL;
 4858     dset->descrip = NULL;
 4859     dataset_destroy_obs_markers(dset);
 4860     free_Z(dset);
 4861     clear_datainfo(dset, CLEAR_SUBSAMPLE);
 4862 
 4863     *dset = *bigset;
 4864     }
 4865 
 4866     return err;
 4867 }
 4868 
 4869 /* MIDAS-related functions */
 4870 
 4871 /* postprocess: fill missing slots in daily data array
 4872    with the period (month or quarter) average
 4873    (FIXME support interpolation as an option?)
 4874 */
 4875 
 4876 int postprocess_daily_data (DATASET *dset, const int *list)
 4877 {
 4878     double *x, xbar, xsum;
 4879     int t, i, n_ok, n_miss;
 4880     int err = 0;
 4881 
 4882     for (t=dset->t1; t<=dset->t2; t++) {
 4883     xsum = 0.0;
 4884     n_ok = n_miss = 0;
 4885     for (i=1; i<=list[0]; i++) {
 4886         x = dset->Z[list[i]];
 4887         if (na(x[t])) {
 4888         n_miss++;
 4889         } else {
 4890         xsum += x[t];
 4891         n_ok++;
 4892         }
 4893     }
 4894     if (n_miss > 0 && n_ok > 0) {
 4895         xbar = xsum / n_ok;
 4896         for (i=1; i<=list[0]; i++) {
 4897         x = dset->Z[list[i]];
 4898         if (na(x[t])) {
 4899             x[t] = xbar;
 4900         }
 4901         }
 4902     }
 4903     }
 4904 
 4905     return err;
 4906 }
 4907 
 4908 int series_get_midas_period (const DATASET *dset, int i)
 4909 {
 4910     if (i > 0 && i < dset->v) {
 4911     return dset->varinfo[i]->midas_period;
 4912     }
 4913 
 4914     return 0;
 4915 }
 4916 
 4917 void series_set_midas_period (const DATASET *dset, int i,
 4918                   int period)
 4919 {
 4920     if (i > 0 && i < dset->v) {
 4921     dset->varinfo[i]->midas_period = period;
 4922     }
 4923 }
 4924 
 4925 int series_get_midas_freq (const DATASET *dset, int i)
 4926 {
 4927     if (i > 0 && i < dset->v) {
 4928     return dset->varinfo[i]->midas_freq;
 4929     }
 4930 
 4931     return 0;
 4932 }
 4933 
 4934 int series_set_midas_freq (const DATASET *dset, int i,
 4935                int freq)
 4936 {
 4937     int err = 0;
 4938 
 4939     if (i > 0 && i < dset->v) {
 4940     if (freq < 5 || freq > 12) {
 4941         err = E_DATA;
 4942     } else {
 4943         dset->varinfo[i]->midas_freq = freq;
 4944     }
 4945     } else {
 4946     err = E_DATA;
 4947     }
 4948 
 4949     return err;
 4950 }
 4951 
 4952 int series_is_midas_anchor (const DATASET *dset, int i)
 4953 {
 4954     if (i > 0 && i < dset->v &&
 4955     (dset->varinfo[i]->flags & VAR_HFANCHOR)) {
 4956     return dset->varinfo[i]->midas_period;
 4957     }
 4958 
 4959     return 0;
 4960 }
 4961 
 4962 void series_set_midas_anchor (const DATASET *dset, int i)
 4963 {
 4964     if (i > 0 && i < dset->v) {
 4965     dset->varinfo[i]->flags |= VAR_HFANCHOR;
 4966     }
 4967 }
 4968 
 4969 /* end MIDAS-related functions */
 4970 
 4971 void *series_info_bundle (const DATASET *dset, int i,
 4972               int *err)
 4973 {
 4974     gretl_bundle *b = NULL;
 4975 
 4976     if (dset != NULL && i >= 0 && i < dset->v) {
 4977     b = gretl_bundle_new();
 4978     if (b == NULL) {
 4979         *err = E_ALLOC;
 4980     }
 4981     } else {
 4982     *err = E_DATA;
 4983     }
 4984 
 4985     if (b != NULL) {
 4986     VARINFO *vinfo = dset->varinfo[i];
 4987 
 4988     gretl_bundle_set_string(b, "name", dset->varname[i]);
 4989     if (vinfo->label != NULL) {
 4990         gretl_bundle_set_string(b, "description", vinfo->label);
 4991     }
 4992     if (vinfo->display_name[0] != '\0') {
 4993         gretl_bundle_set_string(b, "graph_name", vinfo->display_name);
 4994     }
 4995     gretl_bundle_set_int(b, "discrete", vinfo->flags & VAR_DISCRETE ?
 4996                  1 : 0);
 4997     gretl_bundle_set_int(b, "coded", vinfo->flags & VAR_CODED ?
 4998                  1 : 0);
 4999     gretl_bundle_set_string(b, "parent", vinfo->parent);
 5000     if (vinfo->transform > 0) {
 5001         gretl_bundle_set_string(b, "transform",
 5002                     gretl_command_word(vinfo->transform));
 5003     } else {
 5004         gretl_bundle_set_string(b, "transform", "none");
 5005     }
 5006     gretl_bundle_set_int(b, "lag", vinfo->lag);
 5007     gretl_bundle_set_int(b, "has_string_table", vinfo->st != NULL);
 5008     if (vinfo->midas_period > 0) {
 5009         gretl_bundle_set_int(b, "midas_period", vinfo->midas_period);
 5010     }
 5011     if (vinfo->midas_freq > 0) {
 5012         gretl_bundle_set_int(b, "midas_freq", vinfo->midas_freq);
 5013     }
 5014     }
 5015 
 5016     return b;
 5017 }
 5018 
 5019 /* Given a series label @s, see if it can be recognized
 5020    as identifying the series as the product of two others,
 5021    and if so write the names of the others into @targ1
 5022    and @targ2.
 5023 */
 5024 
 5025 static int get_interaction_names (const char *s,
 5026                   char *targ1,
 5027                   char *targ2)
 5028 {
 5029     const char *p;
 5030     int n1, n2, ret = 0;
 5031 
 5032     *targ1 = *targ2 = '\0';
 5033 
 5034     p = strchr(s, '*');
 5035     if (p == NULL || strchr(p+1, '*') != NULL) {
 5036     /* the label string does not contain a single '*' */
 5037     return 0;
 5038     }
 5039 
 5040     s += strspn(s, " ");
 5041     n1 = gretl_namechar_spn(s);
 5042     p++;
 5043     p += strspn(p, " ");
 5044     n2 = gretl_namechar_spn(p);
 5045 
 5046     if (n1 > 0 && n1 < VNAMELEN &&
 5047     n2 > 0 && n2 < VNAMELEN) {
 5048     strncat(targ1, s, n1);
 5049     strncat(targ2, p, n2);
 5050     ret = 1;
 5051     }
 5052 
 5053     return ret;
 5054 }
 5055 
 5056 /* Given a series label @s, see if it can be recognized as
 5057    identifying the series as the square of another, and if
 5058    so write the name of the other into @targ.
 5059 */
 5060 
 5061 static int get_square_parent_name (const char *s, char *targ,
 5062                    char *targ2)
 5063 {
 5064     const char *p;
 5065     int n1, n2, ret = 0;
 5066 
 5067     *targ = '\0';
 5068 
 5069     if (*s == '=' && (p = strstr(s, "squared")) != NULL) {
 5070     /* "= PARENT squared" */
 5071     s++;
 5072     s += strspn(s, " ");
 5073     n1 = gretl_namechar_spn(s);
 5074     n2 = p - s - 1;
 5075     if (n1 > 0 && n1 < VNAMELEN && n2 == n1) {
 5076         strncat(targ, s, n1);
 5077         ret = 1;
 5078     }
 5079     } else if (strchr(s, '^') != NULL) {
 5080     /* "PARENT^2" */
 5081     n1 = gretl_namechar_spn(s);
 5082     if (n1 > 0 && n1 < VNAMELEN) {
 5083         p = s + n1;
 5084         if (!strcmp(p, "^2")) {
 5085         strncat(targ, s, n1);
 5086         ret = 1;
 5087         }
 5088     }
 5089     } else if ((p = strstr(s, "square of ")) != NULL) {
 5090     p += 9;
 5091     p += strspn(p, " ");
 5092     n1 = gretl_namechar_spn(p);
 5093     if (n1 > 0 && n1 < VNAMELEN) {
 5094         strncat(targ, p, n1);
 5095         ret = 1;
 5096     }
 5097     } else if (get_interaction_names(s, targ, targ2)) {
 5098     /* "x * x" ? */
 5099     if (!strcmp(targ, targ2)) {
 5100         ret = 1;
 5101     }
 5102     }
 5103 
 5104     return ret;
 5105 }
 5106 
 5107 /* Given either (a) two series identified by ID numbers
 5108    i, j where the second is supposed to be the square
 5109    of the first, or (b) three series i, j, k where the
 5110    third is supposed to be the product of the first two,
 5111    check that the putative relationship actually holds
 5112    over the current sample range. Return 1 if so, else 0.
 5113 */
 5114 
 5115 static int validate_relationship (int i, int j, int k,
 5116                   const DATASET *dset)
 5117 {
 5118     double xi, xj;
 5119     int t;
 5120 
 5121     for (t=dset->t1; t<=dset->t2; t++) {
 5122     xi = dset->Z[i][t];
 5123     xj = dset->Z[j][t];
 5124     if (k > 0) {
 5125         /* interaction test: xk = xi*xj */
 5126         if (!na(xi) && !na(xj) && dset->Z[k][t] != xi*xj) {
 5127         return 0;
 5128         }
 5129     } else {
 5130         /* square test: xj = xi*xi */
 5131         if (!na(xi) && xj != xi*xi) {
 5132         return 0;
 5133         }
 5134     }
 5135     }
 5136 
 5137     return 1;
 5138 }
 5139 
 5140 /* In case we find more interaction terms that can be fitted into
 5141    the current column-size of the "list info" matrix, add two more
 5142    (since the encoding of each interaction for a given "primary"
 5143    series requires two columns).
 5144 */
 5145 
 5146 static int resize_listinfo_matrix (gretl_matrix *m)
 5147 {
 5148     int newc = m->cols + 2;
 5149     int i, err = 0;
 5150 
 5151     err = gretl_matrix_realloc(m, m->rows, newc);
 5152     if (!err) {
 5153     for (i=0; i<m->rows; i++) {
 5154         gretl_matrix_set(m, i, newc-2, 0);
 5155         gretl_matrix_set(m, i, newc-1, 0);
 5156     }
 5157     }
 5158 
 5159     return err;
 5160 }
 5161 
 5162 static int get_iact_column (gretl_matrix *m, int i, int *err)
 5163 {
 5164     int j;
 5165 
 5166     for (j=3; j<m->cols; j+=2) {
 5167     if (gretl_matrix_get(m, i, j) == 0) {
 5168         return j;
 5169     }
 5170     }
 5171 
 5172     /* looks like we need more columns */
 5173     *err = resize_listinfo_matrix(m);
 5174     return *err ? -1 : m->cols - 2;
 5175 }
 5176 
 5177 /* The (optionally) "condensed" version of the listinfo_matrix
 5178    includes only primary terms (and excludes the constant).
 5179    The first column of the full matrix is replaced by the
 5180    position in @list of each primary term.
 5181 */
 5182 
 5183 static int condense_listinfo_matrix (gretl_matrix *m,
 5184                      const int *list,
 5185                      const DATASET *dset)
 5186 {
 5187     gretl_matrix *mc = NULL;
 5188     char **S = NULL;
 5189     double x;
 5190     int i, j, ic, n = 0;
 5191 
 5192     for (i=0; i<m->rows; i++) {
 5193     if (m->val[i] == 1) {
 5194         n++;
 5195     }
 5196     }
 5197 
 5198     if (n == m->rows) {
 5199     /* nothing to be done */
 5200     return 0;
 5201     }
 5202 
 5203     mc = gretl_matrix_alloc(n, m->cols);
 5204     if (mc == NULL) {
 5205     return E_ALLOC;
 5206     }
 5207 
 5208     S = strings_array_new(n);
 5209 
 5210     ic = 0;
 5211     for (i=0; i<m->rows; i++) {
 5212     if (m->val[i] == 1) {
 5213         gretl_matrix_set(mc, ic, 0, i+1);
 5214         for (j=1; j<m->cols; j++) {
 5215         x = gretl_matrix_get(m, i, j);
 5216         gretl_matrix_set(mc, ic, j, x);
 5217         }
 5218         S[ic] = gretl_strdup(dset->varname[list[i+1]]);
 5219         ic++;
 5220     }
 5221     }
 5222 
 5223     gretl_matrix_reuse(m, n, m->cols);
 5224     gretl_matrix_copy_values(m, mc);
 5225     gretl_matrix_free(mc);
 5226     gretl_matrix_set_rownames(m, S);
 5227 
 5228     return 0;
 5229 }
 5230 
 5231 static gretl_matrix *
 5232 linfo_matrix_via_labels (const int *list,
 5233              const DATASET *dset,
 5234              gretlopt opt,
 5235              int *err)
 5236 {
 5237     gretl_matrix *ret = NULL;
 5238     const char *label;
 5239     char targ1[VNAMELEN];
 5240     char targ2[VNAMELEN];
 5241     int i, vi, j, vj;
 5242     int pcol = 0, dcol = 1;
 5243     int iacol, sqcol = 2;
 5244     int n;
 5245 
 5246     if (list == NULL || list[0] == 0) {
 5247     *err = E_DATA;
 5248     return ret;
 5249     }
 5250 
 5251     n = list[0];
 5252     ret = gretl_zero_matrix_new(n, 5);
 5253     if (ret == NULL) {
 5254     *err = E_ALLOC;
 5255     return ret;
 5256     }
 5257 
 5258     for (i=1; i<=n && !*err; i++) {
 5259     /* default to series is primary */
 5260     gretl_matrix_set(ret, i-1, pcol, 1);
 5261     vi = list[i];
 5262     if (vi == 0) {
 5263         /* mark as non-primary and move on */
 5264         gretl_matrix_set(ret, i-1, pcol, 0);
 5265         continue;
 5266     }
 5267     if (gretl_isdummy(dset->t1, dset->t2, dset->Z[vi])) {
 5268         /* insert dummy flag in this row */
 5269         gretl_matrix_set(ret, i-1, dcol, 1);
 5270     }
 5271     label = series_get_label(dset, vi);
 5272     if (label == NULL) {
 5273         continue;
 5274     }
 5275     if (get_square_parent_name(label, targ1, targ2)) {
 5276         /* looks like this could be a squared term */
 5277         for (j=1; j<=n; j++) {
 5278         if (j == i) continue;
 5279         vj = list[j];
 5280         if (!strcmp(targ1, dset->varname[vj]) &&
 5281             validate_relationship(vj, vi, 0, dset)) {
 5282             /* mark this series as non-primary, and as square */
 5283             gretl_matrix_set(ret, i-1, pcol, 0);
 5284             gretl_matrix_set(ret, i-1, sqcol, j);
 5285             /* insert square ref in parent's row */
 5286             gretl_matrix_set(ret, j-1, sqcol, i);
 5287             break;
 5288         }
 5289         }
 5290         continue;
 5291     }
 5292     if (get_interaction_names(label, targ1, targ2)) {
 5293         /* looks like this could be an interaction term */
 5294         int ia1 = 0, ia2 = 0;
 5295 
 5296         for (j=1; j<=n; j++) {
 5297         if (j == i) continue;
 5298         vj = list[j];
 5299         if (!strcmp(targ1, dset->varname[vj])) {
 5300             ia1 = j;
 5301         } else if (!strcmp(targ2, dset->varname[vj])) {
 5302             ia2 = j;
 5303         }
 5304         }
 5305         if (ia1 > 0 && ia2 > 0 &&
 5306         validate_relationship(list[ia1], list[ia2], vi, dset)) {
 5307         /* mark this series as non-primary, interaction */
 5308         gretl_matrix_set(ret, i-1, pcol, 0);
 5309         gretl_matrix_set(ret, i-1, 3, ia1);
 5310         gretl_matrix_set(ret, i-1, 4, ia2);
 5311         /* we may need to expand the number of columns */
 5312         iacol = get_iact_column(ret, i, err);
 5313         if (!*err) {
 5314             /* insert cross references in parents' rows */
 5315             gretl_matrix_set(ret, ia1-1, iacol, ia2);
 5316             gretl_matrix_set(ret, ia1-1, iacol+1, i);
 5317             gretl_matrix_set(ret, ia2-1, iacol, ia1);
 5318             gretl_matrix_set(ret, ia2-1, iacol+1, i);
 5319         }
 5320         }
 5321     }
 5322     }
 5323 
 5324     if (*err) {
 5325     gretl_matrix_free(ret);
 5326     ret = NULL;
 5327     } else if (opt & OPT_C) {
 5328     condense_listinfo_matrix(ret, list, dset);
 5329     } else {
 5330     /* convenience: attach series names to rows */
 5331     char **S;
 5332     int serr = 0;
 5333 
 5334     S = gretl_list_get_names_array(list, dset, &serr);
 5335     if (S != NULL) {
 5336         gretl_matrix_set_rownames(ret, S);
 5337     }
 5338     }
 5339 
 5340     return ret;
 5341 }
 5342 
 5343 static gretl_matrix *
 5344 linfo_matrix_via_data (const int *list,
 5345                const DATASET *dset,
 5346                gretlopt opt,
 5347                int *err)
 5348 {
 5349     gretl_matrix *ret = NULL;
 5350     int i, vi, j, vj, k, vk;
 5351     int pcol = 0, dcol = 1;
 5352     int iacol, sqcol = 2;
 5353     int n;
 5354 
 5355     if (list == NULL || list[0] == 0) {
 5356     *err = E_DATA;
 5357     return ret;
 5358     }
 5359 
 5360     n = list[0];
 5361     ret = gretl_zero_matrix_new(n, 5);
 5362     if (ret == NULL) {
 5363     *err = E_ALLOC;
 5364     return ret;
 5365     }
 5366 
 5367     for (i=1; i<=n && !*err; i++) {
 5368     int matched = 0;
 5369 
 5370     /* default to series is primary */
 5371     gretl_matrix_set(ret, i-1, pcol, 1);
 5372     vi = list[i];
 5373     if (vi == 0) {
 5374         /* mark as non-primary and move on */
 5375         gretl_matrix_set(ret, i-1, pcol, 0);
 5376         continue;
 5377     }
 5378     if (gretl_isdummy(dset->t1, dset->t2, dset->Z[vi])) {
 5379         /* insert dummy flag in this row */
 5380         gretl_matrix_set(ret, i-1, dcol, 1);
 5381     }
 5382     for (j=1; j<=n && !matched; j++) {
 5383         vj = list[j];
 5384         if (j == i || vj == 0) continue;
 5385         if (validate_relationship(vj, vi, 0, dset)) {
 5386         /* mark this series as non-primary, square */
 5387         gretl_matrix_set(ret, i-1, pcol, 0);
 5388         gretl_matrix_set(ret, i-1, sqcol, j);
 5389         /* insert square ref in parent's row */
 5390         gretl_matrix_set(ret, j-1, sqcol, i);
 5391         matched = 1;
 5392         }
 5393         for (k=1; k<=n && !matched; k++) {
 5394         vk = list[k];
 5395         if (k == i || k == j || vk == 0) continue;
 5396         if (validate_relationship(vj, vk, vi, dset)) {
 5397             /* mark this series as non-primary, interaction */
 5398             gretl_matrix_set(ret, i-1, pcol, 0);
 5399             gretl_matrix_set(ret, i-1, 3, j);
 5400             gretl_matrix_set(ret, i-1, 4, k);
 5401             /* we may need to expand the number of columns */
 5402             iacol = get_iact_column(ret, i, err);
 5403             if (!*err) {
 5404             /* insert cross references in parents' rows */
 5405             gretl_matrix_set(ret, j-1, iacol, k);
 5406             gretl_matrix_set(ret, j-1, iacol+1, i);
 5407             gretl_matrix_set(ret, k-1, iacol, j);
 5408             gretl_matrix_set(ret, k-1, iacol+1, i);
 5409             }
 5410             matched = 1;
 5411         }
 5412         }
 5413     }
 5414     }
 5415 
 5416     if (*err) {
 5417     gretl_matrix_free(ret);
 5418     ret = NULL;
 5419     } else if (opt & OPT_C) {
 5420     condense_listinfo_matrix(ret, list, dset);
 5421     } else {
 5422     /* convenience: attach series names to rows */
 5423     char **S;
 5424     int serr = 0;
 5425 
 5426     S = gretl_list_get_names_array(list, dset, &serr);
 5427     if (S != NULL) {
 5428         gretl_matrix_set_rownames(ret, S);
 5429     }
 5430     }
 5431 
 5432     return ret;
 5433 }
 5434 
 5435 /* Construct a matrix providing information about the relations
 5436    between the series in @list. This will have rows equal to the
 5437    number of series and at least 5 columns (shown as 1-based here).
 5438    All elements of the matrix are zero unless other