"Fossies" - the Fresh Open Source Software Archive

Member "gretl-2020e/lib/src/dataset.c" (19 Nov 2020, 121683 Bytes) of package /linux/misc/gretl-2020e.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "dataset.c" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 2020d_vs_2020e.

    1 /*
    2  *  gretl -- Gnu Regression, Econometrics and Time-series Library
    3  *  Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
    4  *
    5  *  This program is free software: you can redistribute it and/or modify
    6  *  it under the terms of the GNU General Public License as published by
    7  *  the Free Software Foundation, either version 3 of the License, or
    8  *  (at your option) any later version.
    9  *
   10  *  This program is distributed in the hope that it will be useful,
   11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
   12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   13  *  GNU General Public License for more details.
   14  *
   15  *  You should have received a copy of the GNU General Public License
   16  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
   17  *
   18  */
   19 
   20 #include "libgretl.h"
   21 #include "gretl_func.h"
   22 #include "uservar.h"
   23 #include "gretl_string_table.h"
   24 #include "libset.h"
   25 #include "dbread.h"
   26 
   27 #define DDEBUG 0
   28 #define FULLDEBUG 0
   29 
   30 #define Z_COLS_BORROWED 2
   31 
   32 #define dset_zcols_borrowed(d) (d->auxiliary == Z_COLS_BORROWED)
   33 
   34 struct VARINFO_ {
   35     char *label;
   36     char display_name[MAXDISP];
   37     char parent[VNAMELEN];
   38     VarFlags flags;
   39     char compact_method;
   40     gint64 mtime;
   41     short transform;    /* note: command index of transform */
   42     short lag;
   43     short stack_level;
   44     short midas_period;
   45     char midas_freq;
   46     short orig_pd;
   47     series_table *st;
   48 };
   49 
   50 static int pad_daily_data (DATASET *dset, int pd, PRN *prn);
   51 
   52 /**
   53  * check_dataset_is_changed:
   54  * @dset: dataset to check.
   55  *
   56  * Returns: 1 if @dset has been modified since
   57  * the last call to this function.
   58  */
   59 
   60 int check_dataset_is_changed (DATASET *dset)
   61 {
   62     int ret = dset->modflag;
   63 
   64     dset->modflag = 0;
   65     return ret;
   66 }
   67 
   68 /**
   69  * set_dataset_is_changed:
   70  * @dset: dataset.
   71  * @s: 1 or 0.
   72  *
   73  * Sets the internal boolean "changed" flag to @s.
   74  */
   75 
   76 void set_dataset_is_changed (DATASET *dset, int s)
   77 {
   78     if (dset != NULL && gretl_function_depth() == 0) {
   79     dset->modflag = s;
   80     }
   81 }
   82 
   83 static void dataset_set_nobs (DATASET *dset, int n)
   84 {
   85     if (n != dset->n) {
   86     /* if the total number of observations in the dataset
   87        has changed, the current "matrix_mask", if present
   88        (see libset.c), will now be invalid
   89     */
   90     destroy_matrix_mask();
   91     dset->n = n;
   92     }
   93 }
   94 
   95 /**
   96  * free_Z:
   97  * @dset: dataset information.
   98  *
   99  * Does a deep free on the data matrix.
  100  */
  101 
  102 void free_Z (DATASET *dset)
  103 {
  104     if (dset != NULL && dset->Z != NULL) {
  105     int i, v = dset_zcols_borrowed(dset) ? 1 : dset->v;
  106 
  107 #if DDEBUG
  108     fprintf(stderr, "Freeing Z (%p): %d vars\n", (void *) dset->Z, v);
  109 #endif
  110     for (i=0; i<v; i++) {
  111         free(dset->Z[i]);
  112     }
  113     free(dset->Z);
  114     dset->Z = NULL;
  115     }
  116 }
  117 
  118 /**
  119  * dataset_destroy_obs_markers:
  120  * @dset: data information struct.
  121  *
  122  * Frees any allocated observation markers for @dset.
  123  */
  124 
  125 void dataset_destroy_obs_markers (DATASET *dset)
  126 {
  127     int i;
  128 
  129     if (dset->S != NULL) {
  130     for (i=0; i<dset->n; i++) {
  131        free(dset->S[i]);
  132     }
  133     free(dset->S);
  134     dset->S = NULL;
  135     dset->markers = NO_MARKERS;
  136     }
  137 }
  138 
  139 static void free_varinfo (DATASET *dset, int v)
  140 {
  141     if (dset->varinfo[v]->st != NULL) {
  142     series_table_destroy(dset->varinfo[v]->st);
  143     }
  144     if (dset->varinfo[v]->label != NULL) {
  145     free(dset->varinfo[v]->label);
  146     }
  147     free(dset->varinfo[v]);
  148 }
  149 
  150 /**
  151  * clear_datainfo:
  152  * @dset: data information struct.
  153  * @code: either %CLEAR_FULL or %CLEAR_SUBSAMPLE.
  154  *
  155  * Frees the allocated content of a data information struct;
  156  * note that @dset itself is not freed.
  157  */
  158 
  159 void clear_datainfo (DATASET *dset, int code)
  160 {
  161     int i;
  162 
  163     if (dset == NULL) return;
  164 
  165     if (dset->S != NULL) {
  166     dataset_destroy_obs_markers(dset);
  167     }
  168     if (dset->submask != NULL) {
  169     free_subsample_mask(dset->submask);
  170     dset->submask = NULL;
  171     }
  172     if (dset->restriction != NULL) {
  173     free(dset->restriction);
  174     dset->restriction = NULL;
  175     }
  176     if (dset->padmask != NULL) {
  177     free(dset->padmask);
  178     dset->padmask = NULL;
  179     }
  180     if (dset->pangrps != NULL) {
  181     free(dset->pangrps);
  182     dset->pangrps = NULL;
  183     }
  184 
  185     /* if this is not a sub-sample datainfo, free varnames, labels, etc. */
  186 
  187     if (code == CLEAR_FULL) {
  188     if (dset->varname != NULL) {
  189         for (i=0; i<dset->v; i++) {
  190         free(dset->varname[i]);
  191         }
  192         free(dset->varname);
  193         dset->varname = NULL;
  194     }
  195     if (dset->varinfo != NULL) {
  196         for (i=0; i<dset->v; i++) {
  197         free_varinfo(dset, i);
  198         }
  199         free(dset->varinfo);
  200         dset->varinfo = NULL;
  201     }
  202     if (dset->descrip != NULL) {
  203         free(dset->descrip);
  204         dset->descrip = NULL;
  205     }
  206     if (dset->mapfile != NULL) {
  207         free(dset->mapfile);
  208         dset->mapfile = NULL;
  209     }
  210 
  211     maybe_free_full_dataset(dset);
  212 
  213     dset->v = dset->n = 0;
  214     }
  215 }
  216 
  217 /**
  218  * destroy_dataset:
  219  * @dset: pointer to dataset.
  220  *
  221  * Frees all resources associated with @dset.
  222  */
  223 
  224 void destroy_dataset (DATASET *dset)
  225 {
  226     if (dset != NULL) {
  227     free_Z(dset);
  228     clear_datainfo(dset, CLEAR_FULL);
  229     free(dset);
  230     }
  231 }
  232 
  233 /**
  234  * copy_dataset_obs_info:
  235  * @targ: pointer to target dataset.
  236  * @src: pointer to source dataset.
  237  *
  238  * Sets the "date" or observations information in @targ to that
  239  * found in @src.
  240  */
  241 
  242 void copy_dataset_obs_info (DATASET *targ, const DATASET *src)
  243 {
  244     strcpy(targ->stobs, src->stobs);
  245     strcpy(targ->endobs, src->endobs);
  246     targ->sd0 = src->sd0;
  247     targ->pd = src->pd;
  248     targ->structure = src->structure;
  249 }
  250 
  251 /**
  252  * dataset_obs_info_default:
  253  * @dset: pointer to dataset.
  254  *
  255  * Sets the "date" or observations information in @dset to a
  256  * simple default of cross-sectional data, observations 1 to n,
  257  * where n is the %n element (number of observations) in @dset.
  258  */
  259 
  260 void dataset_obs_info_default (DATASET *dset)
  261 {
  262     strcpy(dset->stobs, "1");
  263     sprintf(dset->endobs, "%d", dset->n);
  264     dset->sd0 = 1.0;
  265     dset->pd = 1;
  266     dset->structure = CROSS_SECTION;
  267 }
  268 
  269 /**
  270  * dataset_allocate_obs_markers:
  271  * @dset: pointer to dataset
  272  *
  273  * Allocates space in @dset for strings indentifying the
  274  * observations and initializes all of the markers to empty
  275  * strings.  Note that These strings have a fixed maximum
  276  * length of #OBSLEN - 1.
  277  *
  278  * Returns: 0 on success, E_ALLOC on error.
  279  */
  280 
  281 int dataset_allocate_obs_markers (DATASET *dset)
  282 {
  283     char **S = NULL;
  284     int err = 0;
  285 
  286     if (dset->S == NULL) {
  287     /* not already allocated */
  288     S = strings_array_new_with_length(dset->n, OBSLEN);
  289     if (S == NULL) {
  290         err = E_ALLOC;
  291     } else {
  292         dset->S = S;
  293     }
  294     }
  295 
  296     if (dset->S != NULL) {
  297     dset->markers = REGULAR_MARKERS;
  298     }
  299 
  300     return err;
  301 }
  302 
  303 static void gretl_varinfo_init (VARINFO *vinfo)
  304 {
  305     vinfo->label = NULL;
  306     vinfo->display_name[0] = '\0';
  307     vinfo->parent[0] = '\0';
  308     vinfo->flags = 0;
  309     vinfo->transform = 0;
  310     vinfo->lag = 0;
  311     vinfo->midas_period = 0;
  312     vinfo->midas_freq = 0;
  313     vinfo->orig_pd = 0;
  314     vinfo->compact_method = COMPACT_NONE;
  315     vinfo->mtime = 0;
  316     vinfo->stack_level = gretl_function_depth();
  317     vinfo->st = NULL;
  318 }
  319 
  320 static void copy_label (char **targ, const char *src)
  321 {
  322     free(*targ);
  323     if (src == NULL) {
  324     *targ = NULL;
  325     } else {
  326     *targ = gretl_strdup(src);
  327     }
  328 }
  329 
  330 static int labels_differ (const char *s1, const char *s2)
  331 {
  332     if ((s1 == NULL && s2 != NULL) || (s1 != NULL && s2 == NULL)) {
  333     return 1;
  334     } else if (s1 != NULL && s2 != NULL) {
  335     return strcmp(s1, s2) != 0;
  336     } else {
  337     return 0;
  338     }
  339 }
  340 
  341 /**
  342  * copy_varinfo:
  343  * @targ: target to which to copy.
  344  * @src: source to copy from.
  345  *
  346  * Copies all relevant information from @src to @targ.
  347  */
  348 
  349 void copy_varinfo (VARINFO *targ, const VARINFO *src)
  350 {
  351     if (src == NULL || targ == NULL) {
  352     return;
  353     }
  354     copy_label(&targ->label, src->label);
  355     strcpy(targ->display_name, src->display_name);
  356     strcpy(targ->parent, src->parent);
  357     targ->flags = src->flags;
  358     targ->transform = src->transform;
  359     targ->lag = src->lag;
  360     targ->midas_period = src->midas_period;
  361     targ->midas_freq = src->midas_freq;
  362     targ->orig_pd = src->orig_pd;
  363     targ->compact_method = src->compact_method;
  364     targ->stack_level = src->stack_level;
  365     if (src->st != NULL) {
  366     targ->st = series_table_copy(src->st);
  367     }
  368 }
  369 
  370 /* For use in the context of returning from a sub-sampled
  371    dataset to the full one: trim off series names and
  372    "varinfo" beyond the index @nv, which gives the number
  373    of series in the full dataset.
  374 */
  375 
  376 int shrink_varinfo (DATASET *dset, int nv)
  377 {
  378     char **vnames;
  379     VARINFO **vi;
  380     int i, err = 0;
  381 
  382     if (nv > dset->v) {
  383     return E_DATA;
  384     } else if (nv == dset->v) {
  385     return 0;
  386     }
  387 
  388     for (i=nv; i<dset->v; i++) {
  389     free(dset->varname[i]);
  390     free(dset->varinfo[i]);
  391     }
  392 
  393     vnames = realloc(dset->varname, nv * sizeof *vnames);
  394     if (vnames == NULL) {
  395     err = E_ALLOC;
  396     } else {
  397     dset->varname = vnames;
  398     }
  399 
  400     vi = realloc(dset->varinfo, nv * sizeof *vi);
  401     if (vi == NULL) {
  402     err = E_ALLOC;
  403     } else {
  404     dset->varinfo = vi;
  405     }
  406 
  407     return err;
  408 }
  409 
  410 /**
  411  * dataset_allocate_varnames:
  412  * @dset: pointer to dataset.
  413  *
  414  * Given a blank @dset, which should have been obtained using
  415  * datainfo_new(), allocate space for the names of variables.
  416  * The @v member of @dset (representing the number of variables,
  417  * including the automatically added constant at position 0) must be
  418  * set before calling this function.
  419  *
  420  * Returns: 0 on sucess, E_ALLOC on failure.
  421  */
  422 
  423 int dataset_allocate_varnames (DATASET *dset)
  424 {
  425     int i, j, v = dset->v;
  426     int err = 0;
  427 
  428     dset->varname = strings_array_new_with_length(v, VNAMELEN);
  429     if (dset->varname == NULL) {
  430     return E_ALLOC;
  431     }
  432 
  433     dset->varinfo = malloc(v * sizeof *dset->varinfo);
  434     if (dset->varinfo == NULL) {
  435     free(dset->varname);
  436     return E_ALLOC;
  437     }
  438 
  439     for (i=0; i<v; i++) {
  440     dset->varinfo[i] = malloc(sizeof **dset->varinfo);
  441     if (dset->varinfo[i] == NULL) {
  442         for (j=0; j<i; j++) {
  443         free(dset->varinfo[j]);
  444         }
  445         free(dset->varinfo);
  446         dset->varinfo = NULL;
  447         err = E_ALLOC;
  448         break;
  449     } else {
  450         gretl_varinfo_init(dset->varinfo[i]);
  451     }
  452     }
  453 
  454     if (!err) {
  455     strcpy(dset->varname[0], "const");
  456     series_set_label(dset, 0, _("auto-generated constant"));
  457     }
  458 
  459     return err;
  460 }
  461 
  462 /**
  463  * datainfo_init:
  464  * @dset: pointer to DATASET struct.
  465  *
  466  * Zeros all members of @dset and sets it as a plain cross-section.
  467  * Designed for use with a DATASET structure that has not been
  468  * obtained via datainfo_new().
  469  */
  470 
  471 void datainfo_init (DATASET *dset)
  472 {
  473     dset->v = 0;
  474     dset->n = 0;
  475     dset->pd = 1;
  476     dset->structure = CROSS_SECTION;
  477     dset->sd0 = 1.0;
  478     dset->t1 = 0;
  479     dset->t2 = 0;
  480     dset->stobs[0] = '\0';
  481     dset->endobs[0] = '\0';
  482 
  483     dset->Z = NULL;
  484     dset->varname = NULL;
  485     dset->varinfo = NULL;
  486 
  487     dset->markers = NO_MARKERS;
  488     dset->modflag = 0;
  489 
  490     dset->S = NULL;
  491     dset->descrip = NULL;
  492     dset->submask = NULL;
  493     dset->restriction = NULL;
  494     dset->padmask = NULL;
  495     dset->mapfile = NULL;
  496     dset->pangrps = NULL;
  497     dset->panel_pd = 0;
  498     dset->panel_sd0 = 0;
  499 
  500     dset->auxiliary = 0;
  501     dset->rseed = 0;
  502 }
  503 
  504 /**
  505  * datainfo_new:
  506  *
  507  * Creates a new data information struct pointer from scratch,
  508  * properly initialized as empty (no variables, no observations).
  509  *
  510  * Returns: pointer to data information struct, or NULL on error.
  511  */
  512 
  513 DATASET *datainfo_new (void)
  514 {
  515     DATASET *dset = malloc(sizeof *dset);
  516 
  517     if (dset != NULL) {
  518     datainfo_init(dset);
  519     }
  520 
  521     return dset;
  522 }
  523 
  524 static DATASET *real_create_new_dataset (int nvar, int nobs,
  525                      gretlopt opt)
  526 {
  527     DATASET *dset = datainfo_new();
  528 
  529     if (dset == NULL) return NULL;
  530 
  531     dset->v = nvar;
  532     dset->n = nobs;
  533     dset->Z = NULL;
  534 
  535     if (start_new_Z(dset, opt)) {
  536     free(dset);
  537     return NULL;
  538     }
  539 
  540     if (opt & OPT_M) {
  541     if (dataset_allocate_obs_markers(dset)) {
  542         free_datainfo(dset);
  543         return NULL;
  544     }
  545     }
  546 
  547     dataset_obs_info_default(dset);
  548 
  549     return dset;
  550 }
  551 
  552 /**
  553  * create_new_dataset:
  554  * @nvar: number of variables.
  555  * @nobs: number of observations per variable.
  556  * @markers: 1 if space should be allocated for "case markers" for
  557  * the observations, 0 otherwise.
  558  *
  559  * Allocates space in the dataset to hold the specified number
  560  * of variables and observations.
  561  *
  562  * Returns: pointer to dataset struct, or NULL on error.
  563  */
  564 
  565 DATASET *create_new_dataset (int nvar, int nobs, int markers)
  566 {
  567     gretlopt opt = markers ? OPT_M : OPT_NONE;
  568 
  569     return real_create_new_dataset(nvar, nobs, opt);
  570 }
  571 
  572 DATASET *create_auxiliary_dataset (int nvar, int nobs, gretlopt opt)
  573 {
  574     DATASET *dset = real_create_new_dataset(nvar, nobs, opt);
  575 
  576     if (dset != NULL) {
  577     if (opt & OPT_B) {
  578         dset->auxiliary = Z_COLS_BORROWED;
  579     } else {
  580         dset->auxiliary = 1;
  581     }
  582     }
  583 
  584     return dset;
  585 }
  586 
  587 static double **make_borrowed_Z (int v, int n)
  588 {
  589     double **Z = malloc(v * sizeof *Z);
  590 
  591     if (Z != NULL) {
  592     int i;
  593 
  594     for (i=0; i<v; i++) {
  595         Z[i] = NULL;
  596     }
  597 
  598     Z[0] = malloc(n * sizeof **Z);
  599 
  600     if (Z[0] == NULL) {
  601         free(Z);
  602         Z = NULL;
  603     } else {
  604         for (i=0; i<n; i++) {
  605         Z[0][i] = 1.0;
  606         }
  607     }
  608     }
  609 
  610     return Z;
  611 }
  612 
  613 /**
  614  * allocate_Z:
  615  * @dset: pointer to dataset.
  616  * @opt: may include OPT_B to indicate that the data columns
  617  * will be "borrowed".
  618  *
  619  * Allocates the two-dimensional data array Z,
  620  * based on the v (number of variables) and n (number of
  621  * observations) members of @dset.  The variable at
  622  * position 0 is initialized to all 1s; other variables
  623  * are initialized to #NADBL (unless OPT_B is given).
  624  *
  625  * Returns: 0 on success, E_ALLOC on error.
  626  */
  627 
  628 int allocate_Z (DATASET *dset, gretlopt opt)
  629 {
  630     int i, t;
  631     int err = 0;
  632 
  633     if (dset->Z != NULL) {
  634     fprintf(stderr, "*** error: allocate_Z called with non-NULL Z\n");
  635     }
  636 
  637     if (opt & OPT_B) {
  638     dset->Z = make_borrowed_Z(dset->v, dset->n);
  639     } else {
  640     dset->Z = doubles_array_new(dset->v, dset->n);
  641     }
  642 
  643     if (dset->Z == NULL) {
  644     err = E_ALLOC;
  645     } else if (!(opt & OPT_B)) {
  646     for (i=0; i<dset->v; i++) {
  647         for (t=0; t<dset->n; t++) {
  648         dset->Z[i][t] = (i == 0)? 1.0 : NADBL;
  649         }
  650     }
  651     }
  652 
  653     return err;
  654 }
  655 
  656 /**
  657  * start_new_Z:
  658  * @dset: pointer to dataset.
  659  * @opt: if includes OPT_R we're sub-sampling from a full data set;
  660  * if includes OPT_P, do not null out dset->S and markers.
  661  *
  662  * Initializes the data array within @dset (adding the constant in
  663  * position 0).
  664  *
  665  * Returns: 0 on successful completion, non-zero on error.
  666  */
  667 
  668 int start_new_Z (DATASET *dset, gretlopt opt)
  669 {
  670     if (allocate_Z(dset, opt)) {
  671     return E_ALLOC;
  672     }
  673 
  674     dset->t1 = 0;
  675     dset->t2 = dset->n - 1;
  676 
  677     if (opt & OPT_R) {
  678     /* sub-sampling */
  679     dset->varname = NULL;
  680     dset->varinfo = NULL;
  681     } else if (dataset_allocate_varnames(dset)) {
  682     free_Z(dset);
  683     dset->Z = NULL;
  684     return E_ALLOC;
  685     }
  686 
  687     if (!(opt & OPT_P)) {
  688     dset->S = NULL;
  689     dset->markers = NO_MARKERS;
  690     }
  691 
  692     dset->descrip = NULL;
  693     dset->submask = NULL;
  694     dset->restriction = NULL;
  695     dset->padmask = NULL;
  696     dset->mapfile = NULL;
  697 
  698     if (!(opt & OPT_R)) {
  699     dset->pangrps = NULL;
  700     }
  701 
  702     return 0;
  703 }
  704 
  705 static int reallocate_markers (DATASET *dset, int n)
  706 {
  707     char **S;
  708     int t;
  709 
  710     S = realloc(dset->S, n * sizeof *S);
  711     if (S == NULL) {
  712     return 1;
  713     }
  714 
  715     for (t=dset->n; t<n; t++) {
  716     S[t] = malloc(OBSLEN);
  717     if (S[t] == NULL) {
  718         int j;
  719 
  720         for (j=dset->n; j<t; j++) {
  721         free(S[j]);
  722         }
  723         free(S);
  724         return 1;
  725     }
  726     S[t][0] = '\0';
  727     }
  728 
  729     dset->S = S;
  730 
  731     return 0;
  732 }
  733 
  734 /* Allow for the possibility of centered seasonal dummies: usually
  735    xon = 1 and xoff = 0, but in the centered case xon = 1 - 1/pd
  736    and xoff = -1/pd.
  737 */
  738 
  739 static int get_xon_xoff (const double *x, int n, int pd, double *xon, double *xoff)
  740 {
  741     double cfac = 1.0 / pd;
  742     double xc = 1.0 - cfac, yc = -cfac;
  743     double x0 = 999, y0 = 999;
  744     int t, ret = 1;
  745 
  746     for (t=0; t<n && ret; t++) {
  747     if (x[t] == 1.0) {
  748         if (x0 == 999) x0 = 1.0;
  749         else if (x[t] != x0) ret = 0;
  750     } else if (x[t] == 0.0) {
  751         if (y0 == 999) y0 = 0.0;
  752         else if (x[t] != y0) ret = 0;
  753     } else if (x[t] == xc) {
  754         if (x0 == 999) x0 = xc;
  755         else if (x[t] != x0) ret = 0;
  756     } else if (x[t] == yc) {
  757         if (y0 == 999) y0 = yc;
  758         else if (x[t] != y0) ret = 0;
  759     } else {
  760         ret = 0;
  761     }
  762     }
  763 
  764     if (ret) {
  765     *xon = x0;
  766     *xoff = y0;
  767     }
  768 
  769     return ret;
  770 }
  771 
  772 static int real_periodic_dummy (const double *x, int n,
  773                 int *pd, int *offset,
  774                 double *pxon, double *pxoff)
  775 {
  776     double xon = 1.0, xoff = 0.0;
  777     int onbak = 0;
  778     int gap = 0;
  779     int trail = 0;
  780     int t, m = n - 1, ret = 1;
  781 
  782     if (!get_xon_xoff(x, n, *pd, &xon, &xoff)) {
  783     return 0;
  784     }
  785 
  786     *pd = -1;
  787     *offset = -1;
  788     trail = 0;
  789 
  790     /* find number of trailing "off" values */
  791     for (t=n-1; t>0; t--) {
  792     if (x[t] == xoff) {
  793         trail++;
  794     } else {
  795         if (x[t] == xon) {
  796         m = t;
  797         } else {
  798         ret = 0;
  799         }
  800         break;
  801     }
  802     }
  803 
  804     /* check for dummyhood and periodicity */
  805     for (t=0; t<=m && ret; t++) {
  806     if (x[t] == xoff) {
  807         onbak = 0;
  808         gap++;
  809     } else if (x[t] == xon) {
  810         if (onbak) {
  811         ret = 0;
  812         } else if (*offset < 0) {
  813         *offset = gap;
  814         } else if (*pd < 0) {
  815         *pd = gap + 1;
  816         if (*pd < *offset + 1) {
  817             ret = 0;
  818         }
  819         } else if (gap != *pd - 1) {
  820         ret = 0;
  821         } else if (gap < trail) {
  822         ret = 0;
  823         }
  824         gap = 0;
  825         onbak = 1;
  826     } else {
  827         ret = 0;
  828         break;
  829     }
  830     }
  831 
  832     if (ret && pxon != NULL && pxoff != NULL) {
  833     *pxon = xon;
  834     *pxoff = xoff;
  835     }
  836 
  837     return ret;
  838 }
  839 
  840 /**
  841  * is_periodic_dummy:
  842  * @x: array to examine.
  843  * @dset: pointer to dataset.
  844  *
  845  * Returns: 1 if @x is a periodic dummy variable,
  846  * 0 otherwise.
  847  */
  848 
  849 int is_periodic_dummy (const double *x, const DATASET *dset)
  850 {
  851     int offset, pd = dset->pd;
  852 
  853     return real_periodic_dummy(x, dset->n, &pd, &offset, NULL, NULL);
  854 }
  855 
  856 static int is_linear_trend (const double *x, int n)
  857 {
  858     int t, ret = 1;
  859 
  860     for (t=1; t<n; t++) {
  861     if (x[t] != x[t-1] + 1.0) {
  862         ret = 0;
  863         break;
  864     }
  865     }
  866 
  867     return ret;
  868 }
  869 
  870 static int is_quadratic_trend (const double *x, int n)
  871 {
  872     double t2;
  873     int t, ret = 1;
  874 
  875     for (t=0; t<n; t++) {
  876     t2 = (t + 1) * (t + 1);
  877     if (x[t] != t2) {
  878         ret = 0;
  879         break;
  880     }
  881     }
  882 
  883     return ret;
  884 }
  885 
  886 /**
  887  * is_trend_variable:
  888  * @x: array to examine.
  889  * @n: number of elements in array.
  890  *
  891  * Returns: 1 if @x is a simple linear trend variable, with each
  892  * observation equal to the preceding observation plus 1, or
  893  * if @x is a quadratic trend starting at 1 for the first
  894  * observation in the data set, and 0 otherwise.
  895  */
  896 
  897 int is_trend_variable (const double *x, int n)
  898 {
  899     int ret = 0;
  900 
  901     if (is_linear_trend(x, n)) {
  902     ret = 1;
  903     } else if (is_quadratic_trend(x, n)) {
  904     ret = 1;
  905     }
  906 
  907     return ret;
  908 }
  909 
  910 static void maybe_extend_trends (DATASET *dset, int oldn)
  911 {
  912     int i, t;
  913 
  914     for (i=1; i<dset->v; i++) {
  915     if (is_linear_trend(dset->Z[i], oldn)) {
  916         for (t=oldn; t<dset->n; t++) {
  917         dset->Z[i][t] = dset->Z[i][t-1] + 1.0;
  918         }
  919     } else if (is_quadratic_trend(dset->Z[i], oldn)) {
  920         for (t=oldn; t<dset->n; t++) {
  921         dset->Z[i][t] = (t + 1) * (t + 1);
  922         }
  923     }
  924     }
  925 }
  926 
  927 static void maybe_extend_dummies (DATASET *dset, int oldn)
  928 {
  929     int pd = dset->pd;
  930     double xon = 1.0, xoff = 0.0;
  931     int offset;
  932     int i, t;
  933 
  934     for (i=1; i<dset->v; i++) {
  935     if (real_periodic_dummy(dset->Z[i], oldn, &pd, &offset, &xon, &xoff)) {
  936         for (t=oldn; t<dset->n; t++) {
  937         dset->Z[i][t] = ((t - offset) % pd)? xoff : xon;
  938         }
  939     }
  940     }
  941 }
  942 
  943 /* regular, not panel-time, version */
  944 
  945 static int real_dataset_add_observations (DATASET *dset, int n,
  946                       gretlopt opt)
  947 {
  948     double *x;
  949     int oldn = dset->n;
  950     int i, t, bign;
  951     int err = 0;
  952 
  953     if (dset_zcols_borrowed(dset)) {
  954     fprintf(stderr, "*** Internal error: modifying borrowed data\n");
  955     return E_DATA;
  956     }
  957 
  958     if (n <= 0) {
  959     return 0;
  960     }
  961 
  962     if (dataset_is_panel(dset) && n % dset->pd != 0) {
  963     return E_PDWRONG;
  964     }
  965 
  966     bign = oldn + n;
  967 
  968     for (i=0; i<dset->v; i++) {
  969     x = realloc(dset->Z[i], bign * sizeof *x);
  970     if (x == NULL) {
  971         return E_ALLOC;
  972     }
  973     dset->Z[i] = x;
  974     for (t=oldn; t<bign; t++) {
  975         dset->Z[i][t] = (i == 0)? 1.0 : NADBL;
  976     }
  977     }
  978 
  979     if (dataset_has_markers(dset)) {
  980     if (opt & OPT_D) {
  981         dataset_destroy_obs_markers(dset);
  982     } else {
  983         if (reallocate_markers(dset, bign)) {
  984         return E_ALLOC;
  985         }
  986         for (t=oldn; t<bign; t++) {
  987         sprintf(dset->S[t], "%d", t + 1);
  988         }
  989     }
  990     }
  991 
  992     if (dset->t2 == dset->n - 1) {
  993     dset->t2 = bign - 1;
  994     }
  995 
  996     dataset_set_nobs(dset, bign);
  997 
  998     if (opt & OPT_A) {
  999     maybe_extend_trends(dset, oldn);
 1000     maybe_extend_dummies(dset, oldn);
 1001     }
 1002 
 1003     /* does daily data need special handling? */
 1004     ntolabel(dset->endobs, bign - 1, dset);
 1005 
 1006     return err;
 1007 }
 1008 
 1009 static int panel_dataset_extend_time (DATASET *dset, int n)
 1010 {
 1011     double *utmp, *vtmp;
 1012     char **S = NULL;
 1013     int newT, oldT = dset->pd;
 1014     int oldn = dset->n;
 1015     int n_units;
 1016     int i, j, s, t, bign;
 1017     size_t usz, vsz;
 1018     int err = 0;
 1019 
 1020     if (!dataset_is_panel(dset)) {
 1021     return E_PDWRONG;
 1022     }
 1023 
 1024     if (dset_zcols_borrowed(dset)) {
 1025     fprintf(stderr, "*** Internal error: modifying borrowed data\n");
 1026     return E_DATA;
 1027     }
 1028 
 1029     if (n <= 0) {
 1030     return 0;
 1031     }
 1032 
 1033     n_units = oldn / oldT;
 1034     newT = oldT + n;
 1035     bign = n_units * newT;
 1036 
 1037     usz = newT * sizeof *utmp;
 1038     vsz = bign * sizeof *vtmp;
 1039 
 1040     utmp = malloc(usz);
 1041     if (utmp == NULL) {
 1042     return E_ALLOC;
 1043     }
 1044 
 1045     if (dataset_has_markers(dset)) {
 1046     S = strings_array_new_with_length(bign, OBSLEN);
 1047     if (S == NULL) {
 1048         free(utmp);
 1049         return E_ALLOC;
 1050     }
 1051     }
 1052 
 1053     for (i=0; i<dset->v; i++) {
 1054     int uconst = 1, utrend = 1, dtrend = 1;
 1055     double xbak = NADBL;
 1056     guint32 dt = 0, dbak = 0;
 1057     int ed_err;
 1058 
 1059     vtmp = malloc(vsz);
 1060     if (vtmp == NULL) {
 1061         err = E_ALLOC;
 1062         goto bailout;
 1063     }
 1064 
 1065     s = 0;
 1066     for (j=0; j<n_units; j++) {
 1067         for (t=0; t<oldT; t++) {
 1068         utmp[t] = dset->Z[i][s++];
 1069         if (dtrend) {
 1070             dt = epoch_day_from_ymd_basic(utmp[t]);
 1071         }
 1072         if (t == 0) {
 1073             xbak = utmp[t];
 1074             dbak = dt;
 1075         } else {
 1076             if (uconst && (utmp[t] != xbak)) {
 1077             uconst = 0;
 1078             }
 1079             if (utrend && (utmp[t] != xbak + 1)) {
 1080             utrend = 0;
 1081             }
 1082             if (dtrend && (dt != dbak + 1)) {
 1083             dtrend = 0;
 1084             }
 1085         }
 1086         xbak = utmp[t];
 1087         dbak = dt;
 1088         }
 1089         for (t=oldT; t<newT; t++) {
 1090         if (i == 0) {
 1091             utmp[t] = 1.0;
 1092         } else if (uconst) {
 1093             utmp[t] = utmp[t-1];
 1094         } else if (utrend) {
 1095             utmp[t] = utmp[t-1] + 1;
 1096         } else if (dtrend) {
 1097             dt = epoch_day_from_ymd_basic(utmp[t-1]);
 1098             utmp[t] = ymd_basic_from_epoch_day(dt+1, 0, &ed_err);
 1099         } else {
 1100             utmp[t] = NADBL;
 1101         }
 1102         }
 1103         memcpy(vtmp + j*newT, utmp, usz);
 1104     }
 1105     free(dset->Z[i]);
 1106     dset->Z[i] = vtmp;
 1107     }
 1108 
 1109     if (S != NULL) {
 1110     int k = 0;
 1111 
 1112     s = 0;
 1113     for (j=0; j<n_units; j++) {
 1114         for (t=0; t<newT; t++) {
 1115         if (t < oldT) {
 1116             strcpy(S[k], dset->S[s++]);
 1117         } else {
 1118             sprintf(S[k], "%d:%d", j+1, t+1);
 1119         }
 1120         k++;
 1121         }
 1122     }
 1123     strings_array_free(dset->S, oldn);
 1124     dset->S = S;
 1125     S = NULL;
 1126     }
 1127 
 1128     if (dset->t2 == dset->n - 1) {
 1129     dset->t2 = bign - 1;
 1130     }
 1131 
 1132     dataset_set_nobs(dset, bign);
 1133     dset->pd = newT;
 1134     ntolabel(dset->endobs, bign - 1, dset);
 1135 
 1136  bailout:
 1137 
 1138     free(utmp);
 1139     if (S != NULL) {
 1140     strings_array_free(S, bign);
 1141     }
 1142 
 1143     return err;
 1144 }
 1145 
 1146 /**
 1147  * dataset_add_observations:
 1148  * @dset: pointer to dataset.
 1149  * @n: number of observations to add.
 1150  * @opt: use OPT_A to attempt to recognize and
 1151  * automatically extend simple deterministic variables such
 1152  * as a time trend and periodic dummy variables;
 1153  * use OPT_D to drop any observation markers rather than
 1154  * expanding the set of markers and padding it out with
 1155  * dummy values; use OPT_T to extend in the time dimension
 1156  * in the case of panel data.
 1157  *
 1158  * Extends all series in the dataset by the specified number of
 1159  * extra observations.  The added values are initialized to
 1160  * the missing value code, #NADBL, with the exception of
 1161  * simple deterministic variables when OPT_A is given.
 1162  *
 1163  * Returns: 0 on success, non-zero code on error.
 1164  */
 1165 
 1166 int dataset_add_observations (DATASET *dset, int n, gretlopt opt)
 1167 {
 1168     if (opt & OPT_T) {
 1169     return panel_dataset_extend_time(dset, n);
 1170     } else {
 1171     return real_dataset_add_observations(dset, n, opt);
 1172     }
 1173 }
 1174 
 1175 static int real_insert_observation (int pos, DATASET *dset)
 1176 {
 1177     double *x;
 1178     int n = dset->n + 1;
 1179     int i, t;
 1180     int err = 0;
 1181 
 1182     for (i=0; i<dset->v; i++) {
 1183     x = realloc(dset->Z[i], n * sizeof *x);
 1184     if (x == NULL) {
 1185         return E_ALLOC;
 1186     }
 1187     dset->Z[i] = x;
 1188     for (t=dset->n; t>pos; t--) {
 1189         dset->Z[i][t] = dset->Z[i][t-1];
 1190     }
 1191     dset->Z[i][pos] = (i == 0)? 1.0 : NADBL;
 1192     }
 1193 
 1194     if (dataset_has_markers(dset)) {
 1195     if (reallocate_markers(dset, n)) {
 1196         return E_ALLOC;
 1197     }
 1198     for (t=dset->n; t>pos; t--) {
 1199         strcpy(dset->S[t], dset->S[t-1]);
 1200     }
 1201     sprintf(dset->S[pos], "%d", pos + 1);
 1202     }
 1203 
 1204     if (dset->t2 == dset->n - 1) {
 1205     dset->t2 = n - 1;
 1206     }
 1207 
 1208     dataset_set_nobs(dset, n);
 1209     ntolabel(dset->endobs, n - 1, dset);
 1210 
 1211     return err;
 1212 }
 1213 
 1214 /**
 1215  * dataset_drop_observations:
 1216  * @dset: pointer to dataset.
 1217  * @n: number of observations to drop.
 1218  *
 1219  * Deletes @n observations from the end of each series in the
 1220  * dataset.
 1221  *
 1222  * Returns: 0 on success, non-zero code on error.
 1223  */
 1224 
 1225 int dataset_drop_observations (DATASET *dset, int n)
 1226 {
 1227     double *x;
 1228     int i, newn;
 1229 
 1230     if (dset_zcols_borrowed(dset)) {
 1231     fprintf(stderr, "*** Internal error: modifying borrowed data\n");
 1232     return E_DATA;
 1233     }
 1234 
 1235     if (n <= 0) {
 1236     return 0;
 1237     }
 1238 
 1239     if (dataset_is_panel(dset) && n % dset->pd != 0) {
 1240     return E_PDWRONG;
 1241     }
 1242 
 1243     newn = dset->n - n;
 1244 
 1245     if (newn == 0) {
 1246     free_Z(dset);
 1247     clear_datainfo(dset, CLEAR_FULL);
 1248     return 0;
 1249     }
 1250 
 1251     for (i=0; i<dset->v; i++) {
 1252     x = realloc(dset->Z[i], newn * sizeof *x);
 1253     if (x == NULL) {
 1254         return E_ALLOC;
 1255     }
 1256     dset->Z[i] = x;
 1257     }
 1258 
 1259     if (dataset_has_markers(dset)) {
 1260     if (reallocate_markers(dset, newn)) {
 1261         return E_ALLOC;
 1262     }
 1263     }
 1264 
 1265     if (dset->t2 > newn - 1) {
 1266     dset->t2 = newn - 1;
 1267     }
 1268 
 1269     dataset_set_nobs(dset, newn);
 1270 
 1271     /* does daily data need special handling? */
 1272     ntolabel(dset->endobs, newn - 1, dset);
 1273 
 1274     return 0;
 1275 }
 1276 
 1277 /**
 1278  * dataset_shrink_obs_range:
 1279  * @dset: pointer to dataset.
 1280  *
 1281  * Truncates the range of observations in the dataset, based on
 1282  * the current values of the t1 and t2 members of @dset.
 1283  *
 1284  * Returns: 0 on success, non-zero code on error.
 1285  */
 1286 
 1287 int dataset_shrink_obs_range (DATASET *dset)
 1288 {
 1289     int offset = dset->t1;
 1290     int newn = dset->t2 - dset->t1 + 1;
 1291     int tail = dset->n - newn;
 1292     int err = 0;
 1293 
 1294     if (dset_zcols_borrowed(dset)) {
 1295     fprintf(stderr, "*** Internal error: modifying borrowed data\n");
 1296     return E_DATA;
 1297     }
 1298 
 1299     if (offset > 0) {
 1300     /* If the revised dataset starts at an offset into
 1301        the original, shift each series back to the start of
 1302        its Z[i] array.
 1303     */
 1304     int i, mvsize;
 1305 
 1306     mvsize = newn * sizeof **dset->Z;
 1307     for (i=0; i<dset->v; i++) {
 1308         memmove(dset->Z[i], dset->Z[i] + offset, mvsize);
 1309     }
 1310 
 1311     if (dataset_has_markers(dset)) {
 1312         for (i=0; i<offset; i++) {
 1313         free(dset->S[i]);
 1314         }
 1315         mvsize = newn * sizeof *dset->S;
 1316         memmove(dset->S, dset->S + offset, mvsize);
 1317     }
 1318 
 1319     if (dset->structure == CROSS_SECTION) {
 1320         ntolabel(dset->stobs, 0, dset);
 1321     } else {
 1322         /* FIXME panel? */
 1323         ntolabel(dset->stobs, dset->t1, dset);
 1324         dset->sd0 = get_date_x(dset->pd, dset->stobs);
 1325     }
 1326 
 1327     dset->t1 = 0;
 1328     }
 1329 
 1330     err = dataset_drop_observations(dset, tail);
 1331 
 1332     return err;
 1333 }
 1334 
 1335 static int
 1336 dataset_expand_varinfo (int v0, int newvars, DATASET *dset)
 1337 {
 1338     char **varname = NULL;
 1339     VARINFO **varinfo = NULL;
 1340     int bigv = v0 + newvars;
 1341     int i, v, err = 0;
 1342 
 1343     varname = realloc(dset->varname, bigv * sizeof *varname);
 1344     if (varname == NULL) {
 1345     err = E_ALLOC;
 1346     } else {
 1347     dset->varname = varname;
 1348     }
 1349 
 1350     for (i=0; i<newvars && !err; i++) {
 1351     v = v0 + i;
 1352     dset->varname[v] = malloc(VNAMELEN);
 1353     if (dset->varname[v] == NULL) {
 1354         err = E_ALLOC;
 1355     } else {
 1356         dset->varname[v][0] = '\0';
 1357     }
 1358     }
 1359 
 1360     if (!err && dset->varinfo != NULL) {
 1361     varinfo = realloc(dset->varinfo, bigv * sizeof *varinfo);
 1362     if (varinfo == NULL) {
 1363         err = E_ALLOC;
 1364     } else {
 1365         dset->varinfo = varinfo;
 1366     }
 1367     for (i=0; i<newvars && !err; i++) {
 1368         v = v0 + i;
 1369         dset->varinfo[v] = malloc(sizeof **varinfo);
 1370         if (dset->varinfo[v] == NULL) {
 1371         err = E_ALLOC;
 1372         } else {
 1373         gretl_varinfo_init(dset->varinfo[v]);
 1374         }
 1375     }
 1376     }
 1377 
 1378     return err;
 1379 }
 1380 
 1381 /* note: values of series newly added here are left uninitialized:
 1382    that is the responsibility of the caller
 1383 */
 1384 
 1385 static int real_add_series (int newvars, double *x,
 1386                 DATASET *dset)
 1387 {
 1388     double **newZ;
 1389     int v0 = dset->v;
 1390     int i, err = 0;
 1391 
 1392     if (newvars == 0) {
 1393     /* no-op */
 1394     return 0;
 1395     }
 1396 
 1397     newZ = realloc(dset->Z, (v0 + newvars) * sizeof *newZ);
 1398 
 1399 #if DDEBUG
 1400     fprintf(stderr, "real_add_series: add %d vars, Z = %p\n",
 1401         newvars, (void *) newZ);
 1402 #endif
 1403 
 1404     if (newZ == NULL) {
 1405     err = E_ALLOC;
 1406     } else {
 1407     dset->Z = newZ;
 1408     }
 1409 
 1410     if (!err) {
 1411     if (newvars == 1 && x != NULL) {
 1412         /* a single new var, storage pre-allocated */
 1413         newZ[v0] = x;
 1414     } else {
 1415         for (i=0; i<newvars && !err; i++) {
 1416         newZ[v0+i] = malloc(dset->n * sizeof **newZ);
 1417         if (newZ[v0+i] == NULL) {
 1418             err = E_ALLOC;
 1419         }
 1420         }
 1421     }
 1422     }
 1423 
 1424     if (!err && dset != fetch_full_dataset()) {
 1425     /* don't expand varinfo if we're adding a series
 1426        to the full dataset when currently sub-sampled,
 1427        since in that case varinfo is shared between
 1428        the two datasets
 1429     */
 1430     err = dataset_expand_varinfo(v0, newvars, dset);
 1431     }
 1432 
 1433     if (!err) {
 1434     dset->v += newvars;
 1435     }
 1436 
 1437     return err;
 1438 }
 1439 
 1440 /**
 1441  * dataset_add_series:
 1442  * @dset: pointer to dataset.
 1443  * @newvars: number of series to add.
 1444  *
 1445  * Adds space for the specified number of additional series
 1446  * in the dataset. Values are initialized to zero.
 1447  *
 1448  * Returns: 0 on success, E_ALLOC on error.
 1449  */
 1450 
 1451 int dataset_add_series (DATASET *dset, int newvars)
 1452 {
 1453     int v0 = dset->v;
 1454     int err;
 1455 
 1456     if (dset_zcols_borrowed(dset)) {
 1457     fprintf(stderr, "*** Internal error: modifying borrowed data\n");
 1458     return E_DATA;
 1459     }
 1460 
 1461     err = real_add_series(newvars, NULL, dset);
 1462 
 1463     if (!err) {
 1464     int i, v, t;
 1465 
 1466     for (i=0; i<newvars; i++) {
 1467         v = v0 + i;
 1468         for (t=0; t<dset->n; t++) {
 1469         dset->Z[v][t] = 0.0;
 1470         }
 1471     }
 1472     }
 1473 
 1474     return err;
 1475 }
 1476 
 1477 /**
 1478  * dataset_add_NA_series:
 1479  * @dset: pointer to dataset.
 1480  * @newvars: number of series to add.
 1481  *
 1482  * Adds space for the specified number of additional series
 1483  * in the dataset. Values are initialized to NA.
 1484  *
 1485  * Returns: 0 on success, E_ALLOC on error.
 1486  */
 1487 
 1488 int dataset_add_NA_series (DATASET *dset, int newvars)
 1489 {
 1490     int v0 = dset->v;
 1491     int err;
 1492 
 1493     if (dset_zcols_borrowed(dset)) {
 1494     fprintf(stderr, "*** Internal error: modifying borrowed data\n");
 1495     return E_DATA;
 1496     }
 1497 
 1498     err = real_add_series(newvars, NULL, dset);
 1499 
 1500     if (!err) {
 1501     int i, v, t;
 1502 
 1503     for (i=0; i<newvars; i++) {
 1504         v = v0 + i;
 1505         for (t=0; t<dset->n; t++) {
 1506         dset->Z[v][t] = NADBL;
 1507         }
 1508     }
 1509     }
 1510 
 1511     return err;
 1512 }
 1513 
 1514 /**
 1515  * dataset_add_allocated_series:
 1516  * @dset: pointer to dataset.
 1517  * @x: one-dimensional data array.
 1518  *
 1519  * Adds @x as an additional series in the dataset.
 1520  * The array @x is not copied; it should be treated as
 1521  * belonging to @dset after this operation.
 1522  *
 1523  * Returns: 0 on success, E_ALLOC on error.
 1524  */
 1525 
 1526 int dataset_add_allocated_series (DATASET *dset, double *x)
 1527 {
 1528     if (dset_zcols_borrowed(dset)) {
 1529     fprintf(stderr, "*** Internal error: modifying borrowed data\n");
 1530     return E_DATA;
 1531     } else {
 1532     return real_add_series(1, x, dset);
 1533     }
 1534 }
 1535 
 1536 /**
 1537  * dataset_add_series_as:
 1538  * @dset: pointer to dataset.
 1539  * @x: array to be added.
 1540  * @name: name to give the new variable.
 1541  *
 1542  * Adds to the dataset a new series with name @name and
 1543  * values given by @x.  The new variable is added at one
 1544  * level "deeper" (in terms of function execution) than the
 1545  * current level.  This is for use with user-defined functions.
 1546  *
 1547  * Returns: 0 on success, E_ALLOC on error.
 1548  */
 1549 
 1550 int dataset_add_series_as (DATASET *dset, double *x, const char *name)
 1551 {
 1552     int v, t, err = 0;
 1553 
 1554     if (dset_zcols_borrowed(dset)) {
 1555     fprintf(stderr, "*** Internal error: modifying borrowed data\n");
 1556     return E_DATA;
 1557     }
 1558 
 1559     if (dset->varinfo == NULL) {
 1560     gretl_errmsg_set(_("Please open a data file first"));
 1561     return 1;
 1562     }
 1563 
 1564 #if DDEBUG
 1565     fprintf(stderr, "dataset_add_series_as: incoming Z=%p, name='%s'\n",
 1566         (void *) dset->Z, name);
 1567 #endif
 1568 
 1569     err = real_add_series(1, NULL, dset);
 1570 
 1571     if (!err) {
 1572     v = dset->v - 1;
 1573     for (t=0; t<dset->n; t++) {
 1574         dset->Z[v][t] = x[t];
 1575     }
 1576     strcpy(dset->varname[v], name);
 1577     dset->varinfo[v]->stack_level += 1;
 1578     }
 1579 
 1580     return err;
 1581 }
 1582 
 1583 /**
 1584  * dataset_copy_series_as:
 1585  * @dset: pointer to dataset.
 1586  * @v: index number of variable to copy.
 1587  * @name: name to give the copy.
 1588  *
 1589  * Makes a copy of series @v under the name @name.
 1590  * The copy exists in a variable namespace one level "deeper"
 1591  * (in terms of function execution) than the variable being copied.
 1592  * This is for use with user-defined functions: a variable
 1593  * supplied to a function as an argument is copied into the
 1594  * function's namespace under the name it was given as a
 1595  * parameter.
 1596  *
 1597  * Returns: 0 on success, E_ALLOC on error.
 1598  */
 1599 
 1600 int dataset_copy_series_as (DATASET *dset, int v, const char *name)
 1601 {
 1602     int t, err;
 1603 
 1604     err = real_add_series(1, NULL, dset);
 1605 
 1606     if (!err) {
 1607     int vnew = dset->v - 1;
 1608 
 1609     for (t=0; t<dset->n; t++) {
 1610         dset->Z[vnew][t] = dset->Z[v][t];
 1611     }
 1612     strcpy(dset->varname[vnew], name);
 1613     copy_varinfo(dset->varinfo[vnew], dset->varinfo[v]);
 1614     if (dset->varinfo[v]->flags & VAR_LISTARG) {
 1615         dset->varinfo[vnew]->flags &= ~VAR_LISTARG;
 1616     }
 1617     dset->varinfo[vnew]->stack_level = gretl_function_depth() + 1;
 1618 #if 0
 1619     fprintf(stderr, "copied var %d ('%s', level %d) as var %d ('%s', level %d): ",
 1620         v, dset->varname[v], dset->varinfo[v]->stack_level,
 1621         vnew, name, dset->varinfo[vnew]->stack_level);
 1622     fprintf(stderr, "Z[%d][0] = %g\n", vnew, dset->Z[vnew][0]);
 1623 #endif
 1624     }
 1625 
 1626     return err;
 1627 }
 1628 
 1629 enum {
 1630     DROP_NORMAL,
 1631     DROP_SPECIAL
 1632 };
 1633 
 1634 /* DROP_SPECIAL is used when deleting variables from the "full" shadow
 1635    of a sub-sampled dataset, after deleting those same variables from
 1636    the sub-sampled version: in that case we don't mess with the
 1637    pointer elements of the DATASET struct, because these are shared
 1638    between the full and sub-sampled versions.
 1639 */
 1640 
 1641 static int shrink_dataset_to_size (DATASET *dset, int nv, int drop)
 1642 {
 1643     double **newZ;
 1644 
 1645 #if DDEBUG
 1646     fprintf(stderr, "shrink_dataset_to_size: dset at %p, dset->v=%d, nv=%d\n"
 1647         " drop = %s\n", (void *) dset, dset->v, nv,
 1648         (drop == DROP_NORMAL)? "DROP_NORMAL" : "DROP_SPECIAL");
 1649 #endif
 1650 
 1651     if (drop == DROP_NORMAL) {
 1652     char **varname;
 1653     VARINFO **varinfo;
 1654 
 1655     varname = realloc(dset->varname, nv * sizeof *varname);
 1656     if (varname == NULL) {
 1657         return E_ALLOC;
 1658     }
 1659     dset->varname = varname;
 1660 
 1661     varinfo = realloc(dset->varinfo, nv * sizeof *varinfo);
 1662     if (varinfo == NULL) {
 1663         return E_ALLOC;
 1664     }
 1665     dset->varinfo = varinfo;
 1666     }
 1667 
 1668     newZ = realloc(dset->Z, nv * sizeof *newZ);
 1669     if (newZ == NULL) {
 1670     return E_ALLOC;
 1671     }
 1672 
 1673     dset->Z = newZ;
 1674     dset->v = nv;
 1675 
 1676     return 0;
 1677 }
 1678 
 1679 static int vars_renumbered (const int *list, DATASET *dset,
 1680                 int dmin)
 1681 {
 1682     int i, ndel = 0;
 1683 
 1684     for (i=dmin; i<dset->v; i++) {
 1685     if (in_gretl_list(list, i)) {
 1686         ndel++;
 1687     } else if (ndel > 0 && !series_is_hidden(dset, i)) {
 1688         return 1;
 1689     }
 1690     }
 1691 
 1692     return 0;
 1693 }
 1694 
 1695 int overwrite_err (const char *name)
 1696 {
 1697     gretl_errmsg_sprintf(_("The variable %s is read-only"), name);
 1698 
 1699     return E_DATA;
 1700 }
 1701 
 1702 /**
 1703  * series_is_parent:
 1704  * @dset: dataset information.
 1705  * @v: ID number of series to test.
 1706  *
 1707  * Returns: 1 if variable @v is "parent" to a transformed
 1708  * variable (e.g. a log, lag or difference), othewise 0.
 1709  */
 1710 
 1711 int series_is_parent (const DATASET *dset, int v)
 1712 {
 1713     const char *s = dset->varname[v];
 1714     int i;
 1715 
 1716     if (*s == '\0') {
 1717     return 0;
 1718     }
 1719 
 1720     for (i=1; i<dset->v; i++) {
 1721     if (i != v && !strcmp(s, dset->varinfo[i]->parent)) {
 1722         return 1;
 1723     }
 1724     }
 1725 
 1726     return 0;
 1727 }
 1728 
 1729 /**
 1730  * dataset_rename_series:
 1731  * @dset: dataset information.
 1732  * @v: ID number of the series to be renamed.
 1733  * @name: new name to give the series.
 1734  *
 1735  * Returns: 0 on success, non-zero on error.
 1736  */
 1737 
 1738 int dataset_rename_series (DATASET *dset, int v, const char *name)
 1739 {
 1740     int err = 0;
 1741 
 1742     if (v <= 0 || v >= dset->v || name == NULL) {
 1743     err = E_DATA;
 1744     } else {
 1745     err = check_varname(name);
 1746     }
 1747 
 1748     if (!err) {
 1749     GretlType type;
 1750 
 1751     type = user_var_get_type_by_name(name);
 1752     if (type != GRETL_TYPE_NONE) {
 1753         gretl_errmsg_set("There is already an object of this name");
 1754         err = E_DATA;
 1755     }
 1756     }
 1757 
 1758     if (!err && current_series_index(dset, name) >= 0) {
 1759     gretl_errmsg_set("There is already a series of this name");
 1760     err = E_DATA;
 1761     }
 1762 
 1763     if (!err && (object_is_const(dset->varname[v], v) ||
 1764          series_is_parent(dset, v))) {
 1765     err = overwrite_err(dset->varname[v]);
 1766     }
 1767 
 1768     if (!err && strcmp(dset->varname[v], name)) {
 1769     dset->varname[v][0] = '\0';
 1770     strncat(dset->varname[v], name, VNAMELEN-1);
 1771     set_dataset_is_changed(dset, 1);
 1772     }
 1773 
 1774     return err;
 1775 }
 1776 
 1777 /**
 1778  * dataset_replace_series:
 1779  * @dset: pointer to dataset.
 1780  * @v: ID number of the series to be replaced.
 1781  * @x: replacement values.
 1782  * @descrip: replacement description.
 1783  * @flag: if = DS_GRAB_VALUES then replace dset->Z[@v]
 1784  * with @x, otherwise copy the values in @x to dset->Z[@v].
 1785  *
 1786  * Replaces the description and numerical content of
 1787  * series @v with the information provided.
 1788  *
 1789  * Returns: 0 on success, non-zero on error.
 1790  */
 1791 
 1792 int dataset_replace_series (DATASET *dset, int v,
 1793                 double *x, const char *descrip,
 1794                 DataCopyFlag flag)
 1795 {
 1796     if (v < 0 || v >= dset->v) {
 1797     /* out of bounds */
 1798     return E_DATA;
 1799     }
 1800 
 1801     if (object_is_const(dset->varname[v], v) ||
 1802     series_is_parent(dset, v)) {
 1803     return overwrite_err(dset->varname[v]);
 1804     }
 1805 
 1806     gretl_varinfo_init(dset->varinfo[v]);
 1807     series_set_label(dset, v, descrip);
 1808 
 1809     if (flag == DS_GRAB_VALUES) {
 1810     free(dset->Z[v]);
 1811     dset->Z[v] = x;
 1812     } else {
 1813     int t;
 1814 
 1815     for (t=0; t<dset->n; t++) {
 1816         dset->Z[v][t] = x[t];
 1817     }
 1818     }
 1819 
 1820     set_dataset_is_changed(dset, 1);
 1821 
 1822     return 0;
 1823 }
 1824 
 1825 /**
 1826  * dataset_replace_series_data:
 1827  * @dset: pointer to dataset.
 1828  * @v: ID number of the series to be modified.
 1829  * @x: replacement values.
 1830  * @t1: start of sample range.
 1831  * @t2: end of sample range.
 1832  * @descrip: replacement description.
 1833  *
 1834  * Replaces the description and numerical content of
 1835  * series @v over the given sample range, with the
 1836  * information provided.
 1837  *
 1838  * Returns: 0 on success, non-zero on error.
 1839  */
 1840 
 1841 int dataset_replace_series_data (DATASET *dset, int v,
 1842                  const double *x,
 1843                  int t1, int t2,
 1844                  const char *descrip)
 1845 {
 1846     int t, s;
 1847 
 1848     if (v < 0 || v >= dset->v) {
 1849     /* out of bounds */
 1850     return E_DATA;
 1851     }
 1852 
 1853     if (object_is_const(dset->varname[v], v) ||
 1854     series_is_parent(dset, v)) {
 1855     return overwrite_err(dset->varname[v]);
 1856     }
 1857 
 1858     gretl_varinfo_init(dset->varinfo[v]);
 1859     series_set_label(dset, v, descrip);
 1860 
 1861     s = 0;
 1862     for (t=t1; t<=t2; t++) {
 1863     dset->Z[v][t] = x[s++];
 1864     }
 1865 
 1866     set_dataset_is_changed(dset, 1);
 1867 
 1868     return 0;
 1869 }
 1870 
 1871 static int real_drop_listed_vars (int *list, DATASET *dset,
 1872                   int *renumber, int drop,
 1873                   PRN *prn)
 1874 {
 1875     int oldv = dset->v, vmax = dset->v;
 1876     char vname[VNAMELEN] = {0};
 1877     int d0, d1;
 1878     int delmin = oldv;
 1879     int i, v, ndel = 0;
 1880     int err = 0;
 1881 
 1882     if (renumber != NULL) {
 1883     *renumber = 0;
 1884     }
 1885 
 1886     if (list == NULL || list[0] == 0) {
 1887     /* no-op */
 1888     return 0;
 1889     }
 1890 
 1891     d0 = list[0];
 1892 
 1893     check_variable_deletion_list(list, dset);
 1894     d1 = list[0];
 1895     if (prn != NULL && d1 == 1) {
 1896     strcpy(vname, dset->varname[list[1]]);
 1897     }
 1898 
 1899     if (d1 == 0) {
 1900     goto finish;
 1901     }
 1902 
 1903 #if DDEBUG
 1904     fprintf(stderr, "real_drop_listed_variables: dropping %d vars:\n",
 1905         list[0]);
 1906 #endif
 1907 
 1908     /* check that no vars to be deleted are marked "const", and do
 1909        some preliminary accounting while we're at it */
 1910 
 1911     for (i=1; i<=list[0]; i++) {
 1912     v = list[i];
 1913     if (v > 0 && v < oldv) {
 1914         if (object_is_const(dset->varname[v], v)) {
 1915         return overwrite_err(dset->varname[v]);
 1916         }
 1917         if (v < delmin) {
 1918         delmin = v;
 1919         }
 1920         ndel++;
 1921     }
 1922     }
 1923 
 1924     if (ndel == 0) {
 1925     return 0;
 1926     }
 1927 
 1928     if (renumber != NULL) {
 1929     *renumber = vars_renumbered(list, dset, delmin);
 1930     }
 1931 
 1932 #if DDEBUG
 1933     fprintf(stderr, "real_drop_listed_variables: lowest ID of deleted var"
 1934         " = %d\n", delmin);
 1935 #endif
 1936 
 1937     /* free and set to NULL all the vars to be deleted */
 1938     for (i=1; i<=list[0]; i++) {
 1939     v = list[i];
 1940     if (v > 0 && v < oldv) {
 1941         free(dset->Z[v]);
 1942         dset->Z[v] = NULL;
 1943         if (drop == DROP_NORMAL) {
 1944         free(dset->varname[v]);
 1945         free(dset->varinfo[v]);
 1946         }
 1947     }
 1948     }
 1949 
 1950     /* repack pointers if necessary */
 1951 
 1952     for (v=1; v<vmax; v++) {
 1953     if (dset->Z[v] == NULL) {
 1954         int gap = 1;
 1955 
 1956         for (i=v+1; i<vmax; i++) {
 1957         if (dset->Z[i] == NULL) {
 1958             gap++;
 1959         } else {
 1960             break;
 1961         }
 1962         }
 1963 
 1964         if (i < vmax) {
 1965         vmax -= gap;
 1966         for (i=v; i<vmax; i++) {
 1967             if (drop == DROP_NORMAL) {
 1968             dset->varname[i] = dset->varname[i + gap];
 1969             dset->varinfo[i] = dset->varinfo[i + gap];
 1970             }
 1971             dset->Z[i] = dset->Z[i + gap];
 1972         }
 1973         } else {
 1974         /* deleting all subsequent vars: done */
 1975         break;
 1976         }
 1977     }
 1978     }
 1979 
 1980     err = shrink_dataset_to_size(dset, oldv - ndel, drop);
 1981 
 1982  finish:
 1983 
 1984     /* report results, if appropriate */
 1985 
 1986     if (!err && prn != NULL) {
 1987     if (d0 == d1) {
 1988         if (gretl_messages_on()) {
 1989         if (*vname != '\0') {
 1990             pprintf(prn, _("Deleted %s"), vname);
 1991         } else {
 1992             pprintf(prn, _("Deleted %d variables"), d1);
 1993         }
 1994         pputc(prn, '\n');
 1995         }
 1996     } else {
 1997         if (d1 == 0) {
 1998         pputs(prn, _("No variables were deleted"));
 1999         } else if (*vname != '\0') {
 2000         pprintf(prn, _("Deleted %s"), vname);
 2001         } else {
 2002         pprintf(prn, _("Deleted %d variables"), d1);
 2003         }
 2004         pprintf(prn, " (%s)\n", _("some data were in use"));
 2005     }
 2006     }
 2007 
 2008     return err;
 2009 }
 2010 
 2011 static int *make_dollar_list (DATASET *dset, int *err)
 2012 {
 2013     int *list = NULL;
 2014     int i;
 2015 
 2016     for (i=1; i<dset->v; i++) {
 2017     if (dset->varname[i][0] == '$') {
 2018         list = gretl_list_append_term(&list, i);
 2019         if (list == NULL) {
 2020         *err = E_ALLOC;
 2021         break;
 2022         }
 2023     }
 2024     }
 2025 
 2026     return list;
 2027 }
 2028 
 2029 /**
 2030  * dataset_drop_listed_variables:
 2031  * @list: list of variable to drop, by ID number.
 2032  * @dset: pointer to dataset.
 2033  * @renumber: location for return of information on whether
 2034  * remaining variables have been renumbered as a result, or
 2035  * NULL.
 2036  * @prn: pointer to printing struct.
 2037  *
 2038  * Deletes the variables given in @list from the dataset.  Remaining
 2039  * variables may have their ID numbers changed as a consequence. If
 2040  * @renumber is not NULL, this location receives 1 in case variables
 2041  * have been renumbered, 0 otherwise.
 2042  *
 2043  * Returns: 0 on success, E_ALLOC on error.
 2044  */
 2045 
 2046 int dataset_drop_listed_variables (int *list,
 2047                    DATASET *dset,
 2048                    int *renumber,
 2049                    PRN *prn)
 2050 {
 2051     int oldv = dset->v;
 2052     int *dlist = NULL;
 2053     int dupv, free_dlist = 0;
 2054     int err = 0;
 2055 
 2056     if (dset->n == 0 || dset->v == 0) {
 2057     return E_NODATA;
 2058     }
 2059 
 2060     if (dset_zcols_borrowed(dset)) {
 2061     fprintf(stderr, "*** Internal error: modifying borrowed data\n");
 2062     return E_DATA;
 2063     }
 2064 
 2065     if (list == NULL) {
 2066     /* signal to drop internal "$" variables */
 2067     dlist = make_dollar_list(dset, &err);
 2068     if (err) {
 2069         return err;
 2070     } else if (dlist == NULL) {
 2071         /* no-op */
 2072         return 0;
 2073     }
 2074     free_dlist = 1;
 2075     } else if (list[0] == 0) {
 2076     /* no-op */
 2077     return 0;
 2078     } else {
 2079     dlist = list;
 2080     }
 2081 
 2082     dupv = gretl_list_duplicates(dlist, DELEET);
 2083     if (dupv >= 0) {
 2084     gretl_errmsg_sprintf(_("variable %d duplicated in the "
 2085                    "command list."), dupv);
 2086     return E_DATA;
 2087     }
 2088 
 2089     err = real_drop_listed_vars(dlist, dset, renumber,
 2090                 DROP_NORMAL, prn);
 2091 
 2092     if (dlist[0] > 0) {
 2093     if (!err && !dset->auxiliary) {
 2094         err = gretl_lists_revise(dlist, 0);
 2095     }
 2096 
 2097     if (!err && complex_subsampled()) {
 2098         DATASET *fdset = fetch_full_dataset();
 2099 
 2100         err = real_drop_listed_vars(dlist, fdset, NULL,
 2101                     DROP_SPECIAL, NULL);
 2102     }
 2103     }
 2104 
 2105     if (free_dlist) {
 2106     free(dlist);
 2107     } else if (dset->v != oldv) {
 2108     set_dataset_is_changed(dset, 1);
 2109     }
 2110 
 2111     return err;
 2112 }
 2113 
 2114 /**
 2115  * dataset_drop_variable:
 2116  * @v: ID number of variable to drop.
 2117  * @dset: pointer to dataset.
 2118  *
 2119  * Deletes variable @v from the dataset.
 2120  *
 2121  * Returns: 0 on success, E_ALLOC on error.
 2122  */
 2123 
 2124 int dataset_drop_variable (int v, DATASET *dset)
 2125 {
 2126     int list[2] = {1, v};
 2127 
 2128     if (v <= 0 || v >= dset->v) {
 2129     return E_DATA;
 2130     }
 2131 
 2132     if (dset_zcols_borrowed(dset)) {
 2133     fprintf(stderr, "*** Internal error: modifying borrowed data\n");
 2134     return E_DATA;
 2135     }
 2136 
 2137     return dataset_drop_listed_variables(list, dset, NULL, NULL);
 2138 }
 2139 
 2140 /**
 2141  * dataset_renumber_variable:
 2142  * @v_old: original ID number of variable.
 2143  * @v_new: new ID number.
 2144  * @dset: dataset information.
 2145  *
 2146  * Moves the variable that was originally at position @v_old
 2147  * in the datset to position @v_new, renumbering other
 2148  * variables as required.
 2149  *
 2150  * Returns: 0 on success, error code on error;
 2151  */
 2152 
 2153 int dataset_renumber_variable (int v_old, int v_new,
 2154                    DATASET *dset)
 2155 {
 2156     double *x;
 2157     VARINFO *vinfo;
 2158     char vname[VNAMELEN];
 2159     int i;
 2160 
 2161     if (complex_subsampled()) {
 2162     /* too tricky */
 2163     gretl_errmsg_set(_("dataset is subsampled"));
 2164     return E_DATA;
 2165     }
 2166 
 2167     if (dset_zcols_borrowed(dset)) {
 2168     fprintf(stderr, "*** Internal error: modifying borrowed data\n");
 2169     return E_DATA;
 2170     }
 2171 
 2172     if (v_old < 1 || v_old > dset->v - 1 ||
 2173     v_new < 1 || v_new > dset->v - 1) {
 2174     /* out of bounds */
 2175     return E_DATA;
 2176     }
 2177 
 2178     if (v_old == v_new) {
 2179     /* no-op */
 2180     return 0;
 2181     }
 2182 
 2183     x = dset->Z[v_old];
 2184     vinfo = dset->varinfo[v_old];
 2185     strcpy(vname, dset->varname[v_old]);
 2186 
 2187     if (v_new < v_old) {
 2188     /* moving up in ordering */
 2189     for (i=v_old; i>v_new; i--) {
 2190         dset->Z[i] = dset->Z[i-1];
 2191         strcpy(dset->varname[i], dset->varname[i-1]);
 2192         dset->varinfo[i] = dset->varinfo[i-1];
 2193     }
 2194     } else {
 2195     /* moving down in ordering */
 2196     for (i=v_old; i<v_new; i++) {
 2197         dset->Z[i] = dset->Z[i+1];
 2198         strcpy(dset->varname[i], dset->varname[i+1]);
 2199         dset->varinfo[i] = dset->varinfo[i+1];
 2200     }
 2201     }
 2202 
 2203     dset->Z[v_new] = x;
 2204     strcpy(dset->varname[v_new], vname);
 2205     dset->varinfo[v_new] = vinfo;
 2206 
 2207     set_dataset_is_changed(dset, 1);
 2208 
 2209     return 0;
 2210 }
 2211 
 2212 /**
 2213  * dataset_destroy_hidden_variables:
 2214  * @dset: pointer to dataset.
 2215  * @vmin: do not drop variables with ID numbers less than this.
 2216  *
 2217  * Deletes from the dataset any "hidden" variables that have
 2218  * been added automatically (for example, auto-generated variables
 2219  * used for the x-axis in graph plotting), and that have ID
 2220  * numbers greater than or equal to @vmin.  Never deletes the
 2221  * automatically generated constant (ID number 0).
 2222  *
 2223  * Returns: 0 on success, E_ALLOC on error.
 2224  */
 2225 
 2226 int dataset_destroy_hidden_variables (DATASET *dset, int vmin)
 2227 {
 2228     int i, nhid = 0;
 2229     int err = 0;
 2230 
 2231     if (vmin <= 1) vmin = 1;
 2232 
 2233     for (i=vmin; i<dset->v; i++) {
 2234     if (series_is_hidden(dset, i)) {
 2235         nhid++;
 2236     }
 2237     }
 2238 
 2239     if (nhid > 0) {
 2240     int *list = gretl_list_new(nhid);
 2241 
 2242     if (list == NULL) {
 2243         err = 1;
 2244     } else {
 2245         int j = 1;
 2246 
 2247         for (i=vmin; i<dset->v; i++) {
 2248         if (series_is_hidden(dset, i)) {
 2249             list[j++] = i;
 2250         }
 2251         }
 2252         err = dataset_drop_listed_variables(list, dset, NULL, NULL);
 2253         free(list);
 2254     }
 2255     }
 2256 
 2257     return err;
 2258 }
 2259 
 2260 int dataset_set_matrix_name (DATASET *dset, const char *name)
 2261 {
 2262     int err = 0;
 2263 
 2264     if (dset->descrip != NULL) {
 2265     free(dset->descrip);
 2266     dset->descrip = NULL;
 2267     }
 2268 
 2269     if (name != NULL && *name != '\0') {
 2270     dset->descrip = malloc(strlen(name) + 8);
 2271     if (dset->descrip == NULL) {
 2272         err = E_ALLOC;
 2273     } else {
 2274         sprintf(dset->descrip, "matrix:%s", name);
 2275     }
 2276     }
 2277 
 2278     return err;
 2279 }
 2280 
 2281 const char *dataset_get_matrix_name (const DATASET *dset)
 2282 {
 2283     if (dset->descrip != NULL &&
 2284     strlen(dset->descrip) > 7 &&
 2285     !strncmp(dset->descrip, "matrix:", 7)) {
 2286     return dset->descrip + 7;
 2287     } else {
 2288     return NULL;
 2289     }
 2290 }
 2291 
 2292 const char *dataset_get_mapfile (const DATASET *dset)
 2293 {
 2294     return dset == NULL ? NULL : dset->mapfile;
 2295 }
 2296 
 2297 const char *dataset_period_label (const DATASET *dset)
 2298 {
 2299     if (dset == NULL) {
 2300     return _("periods");
 2301     } else if (quarterly_or_monthly(dset)) {
 2302     return dset->pd == 4 ? _("quarters") : _("months");
 2303     } else if (annual_data(dset)) {
 2304     return _("years");
 2305     } else if (dataset_is_weekly(dset)) {
 2306     return _("weeks");
 2307     } else if (dataset_is_daily(dset)) {
 2308     return _("days");
 2309     } else if (dataset_is_hourly(dset)) {
 2310     return _("hours");
 2311     } else {
 2312     return _("periods");
 2313     }
 2314 }
 2315 
 2316 /* intended for use with newly imported data: trash any
 2317    series that contain nothing but NAs
 2318 */
 2319 
 2320 int maybe_prune_dataset (DATASET **pdset, void *p)
 2321 {
 2322     DATASET *dset = *pdset;
 2323     int allmiss, prune = 0, err = 0;
 2324     int i, t;
 2325 
 2326     for (i=1; i<dset->v; i++) {
 2327     allmiss = 1;
 2328     for (t=0; t<dset->n; t++) {
 2329         if (!na(dset->Z[i][t])) {
 2330         allmiss = 0;
 2331         break;
 2332         }
 2333     }
 2334     if (allmiss) {
 2335         prune = 1;
 2336         break;
 2337     }
 2338     }
 2339 
 2340     if (prune) {
 2341     char *mask = calloc(dset->v, 1);
 2342     DATASET *newset = NULL;
 2343     int ndrop = 0;
 2344 
 2345     if (mask == NULL) {
 2346         return E_ALLOC;
 2347     }
 2348 
 2349     for (i=1; i<dset->v; i++) {
 2350         allmiss = 1;
 2351         for (t=0; t<dset->n; t++) {
 2352         if (!na(dset->Z[i][t])) {
 2353             allmiss = 0;
 2354             break;
 2355         }
 2356         }
 2357         if (allmiss) {
 2358         mask[i] = 1;
 2359         ndrop++;
 2360         }
 2361     }
 2362 
 2363     newset = datainfo_new();
 2364     if (newset == NULL) {
 2365         err = E_ALLOC;
 2366     } else {
 2367         newset->v = dset->v - ndrop;
 2368         newset->n = dset->n;
 2369         err = start_new_Z(newset, 0);
 2370     }
 2371 
 2372     if (!err) {
 2373         gretl_string_table *st = (gretl_string_table *) p;
 2374         size_t ssize = dset->n * sizeof **newset->Z;
 2375         int k = 1;
 2376 
 2377         for (i=1; i<dset->v; i++) {
 2378         if (!mask[i]) {
 2379             memcpy(newset->Z[k], dset->Z[i], ssize);
 2380             strcpy(newset->varname[k], dset->varname[i]);
 2381             copy_label(&newset->varinfo[k]->label,
 2382                    dset->varinfo[i]->label);
 2383             if (st != NULL && k < i) {
 2384             gretl_string_table_reset_column_id(st, i, k);
 2385             }
 2386             k++;
 2387         }
 2388         }
 2389 
 2390         destroy_dataset(dset);
 2391         *pdset = newset;
 2392 
 2393         fprintf(stderr, "pruned dataset to %d variables\n", newset->v);
 2394     }
 2395 
 2396     free(mask);
 2397     }
 2398 
 2399     return err;
 2400 }
 2401 
 2402 /* apparatus for sorting entire dataset */
 2403 
 2404 typedef struct spoint_t_ spoint_t;
 2405 
 2406 struct spoint_t_ {
 2407     int obsnum;
 2408     int nvals;
 2409     double *vals;
 2410 };
 2411 
 2412 static void free_spoints (spoint_t *sv, int n)
 2413 {
 2414     int i;
 2415 
 2416     for (i=0; i<n; i++) {
 2417     free(sv[i].vals);
 2418     }
 2419 
 2420     free(sv);
 2421 }
 2422 
 2423 static spoint_t *allocate_spoints (int n, int v)
 2424 {
 2425     spoint_t *sv;
 2426     int i;
 2427 
 2428     sv = malloc(n * sizeof *sv);
 2429 
 2430     if (sv != NULL) {
 2431     for (i=0; i<n; i++) {
 2432         sv[i].vals = NULL;
 2433     }
 2434     for (i=0; i<n; i++) {
 2435         sv[i].vals = malloc(v * sizeof(double));
 2436         if (sv[i].vals == NULL) {
 2437         free_spoints(sv, n);
 2438         sv = NULL;
 2439         break;
 2440         }
 2441     }
 2442     }
 2443 
 2444     return sv;
 2445 }
 2446 
 2447 static int compare_vals_up (const void *a, const void *b)
 2448 {
 2449     const spoint_t *pa = (const spoint_t *) a;
 2450     const spoint_t *pb = (const spoint_t *) b;
 2451     int i, ret = 0;
 2452 
 2453     for (i=0; i<pa->nvals && !ret; i++) {
 2454     if (isnan(pa->vals[i]) || isnan(pb->vals[i])) {
 2455         if (!isnan(pa->vals[i])) {
 2456         ret = -1;
 2457         } else if (!isnan(pb->vals[i])) {
 2458         ret = 1;
 2459         }
 2460     } else {
 2461         ret = (pa->vals[i] > pb->vals[i]) - (pa->vals[i] < pb->vals[i]);
 2462     }
 2463     }
 2464 
 2465     return ret;
 2466 }
 2467 
 2468 static int compare_vals_down (const void *a, const void *b)
 2469 {
 2470     const spoint_t *pa = (const spoint_t *) a;
 2471     const spoint_t *pb = (const spoint_t *) b;
 2472     int i, ret = 0;
 2473 
 2474     for (i=0; i<pa->nvals && !ret; i++) {
 2475     if (isnan(pa->vals[i]) || isnan(pb->vals[i])) {
 2476         if (!isnan(pa->vals[i])) {
 2477         ret = 1;
 2478         } else if (!isnan(pb->vals[i])) {
 2479         ret = -1;
 2480         }
 2481     } else {
 2482         ret = (pa->vals[i] < pb->vals[i]) - (pa->vals[i] > pb->vals[i]);
 2483     }
 2484     }
 2485 
 2486     return ret;
 2487 }
 2488 
 2489 /* Turn a string-valued series into an integer-valued series
 2490    representing the places of the strings in lexical order.
 2491 */
 2492 
 2493 typedef struct lexval_ {
 2494     const char *s;
 2495     int code;
 2496 } lexval;
 2497 
 2498 static int compare_lexvals (const void *a, const void *b)
 2499 {
 2500     const lexval *lva = (const lexval *) a;
 2501     const lexval *lvb = (const lexval *) b;
 2502 
 2503     return g_utf8_collate(lva->s, lvb->s);
 2504 }
 2505 
 2506 static int series_to_lexvals (DATASET *dset, int v, int *targ)
 2507 {
 2508     int i, t, ct, n_strs;
 2509     series_table *st = series_get_string_table(dset, v);
 2510     char **strs = series_table_get_strings(st, &n_strs);
 2511     lexval *lexvals;
 2512 
 2513     lexvals = calloc(n_strs, sizeof *lexvals);
 2514     if (lexvals == NULL) {
 2515     return E_ALLOC;
 2516     }
 2517 
 2518     for (i=0; i<n_strs; i++) {
 2519     lexvals[i].s = strs[i];
 2520     lexvals[i].code = i+1;
 2521     }
 2522 
 2523     qsort(lexvals, n_strs, sizeof *lexvals, compare_lexvals);
 2524 
 2525     for (t=0; t<dset->n; t++) {
 2526     if (na(dset->Z[v][t])) {
 2527         targ[t] = INT_MAX;
 2528     } else {
 2529         ct = (int) dset->Z[v][t];
 2530         targ[t] = 0;
 2531         for (i=0; i<n_strs; i++) {
 2532         if (ct == lexvals[i].code) {
 2533             targ[t] = i+1;
 2534             break;
 2535         }
 2536         }
 2537     }
 2538     }
 2539 
 2540     free(lexvals);
 2541 
 2542     return 0;
 2543 }
 2544 
 2545 int dataset_sort_by (DATASET *dset, const int *list, gretlopt opt)
 2546 {
 2547     spoint_t *sv = NULL;
 2548     double *x = NULL;
 2549     int *xs = NULL;
 2550     int *xsi = NULL;
 2551     char **S = NULL;
 2552     int ns = list[0];
 2553     int nsvals = 0;
 2554     int i, t, v;
 2555     int err = 0;
 2556 
 2557     sv = allocate_spoints(dset->n, ns);
 2558     if (sv == NULL) {
 2559     return E_ALLOC;
 2560     }
 2561 
 2562     x = malloc(dset->n * sizeof *x);
 2563     if (x == NULL) {
 2564     free_spoints(sv, dset->n);
 2565     return E_ALLOC;
 2566     }
 2567 
 2568     if (dset->S != NULL) {
 2569     S = strings_array_new_with_length(dset->n, OBSLEN);
 2570     if (S == NULL) {
 2571         err = E_ALLOC;
 2572         goto bailout;
 2573     }
 2574     }
 2575 
 2576     for (i=0; i<ns; i++) {
 2577     if (is_string_valued(dset, list[i+1])) {
 2578         nsvals++;
 2579     }
 2580     }
 2581     if (nsvals > 0) {
 2582     xs = malloc(nsvals * dset->n * sizeof *xs);
 2583     if (xs == NULL) {
 2584         err = E_ALLOC;
 2585     } else {
 2586         xsi = xs;
 2587         for (i=0; i<ns && !err; i++) {
 2588         v = list[i+1];
 2589         if (is_string_valued(dset, v)) {
 2590             err = series_to_lexvals(dset, v, xsi);
 2591             xsi += dset->n;
 2592         }
 2593         }
 2594     }
 2595     if (err) {
 2596         goto bailout;
 2597     }
 2598     }
 2599 
 2600     for (t=0; t<dset->n; t++) {
 2601     sv[t].obsnum = t;
 2602     sv[t].nvals = ns;
 2603     }
 2604     xsi = xs;
 2605     for (i=0; i<ns; i++) {
 2606     v = list[i+1];
 2607     if (is_string_valued(dset, v)) {
 2608         for (t=0; t<dset->n; t++) {
 2609         if (xsi[t] == INT_MAX) {
 2610             sv[t].vals[i] = NADBL;
 2611         } else {
 2612             sv[t].vals[i] = (double) xsi[t];
 2613         }
 2614         }
 2615         xsi += dset->n;
 2616     } else {
 2617         for (t=0; t<dset->n; t++) {
 2618         sv[t].vals[i] = dset->Z[v][t];
 2619         }
 2620     }
 2621     }
 2622 
 2623     if (opt & OPT_D) {
 2624     /* descending */
 2625     qsort(sv, dset->n, sizeof *sv, compare_vals_down);
 2626     } else {
 2627     qsort(sv, dset->n, sizeof *sv, compare_vals_up);
 2628     }
 2629 
 2630     for (i=1; i<dset->v; i++) {
 2631     for (t=0; t<dset->n; t++) {
 2632         x[t] = dset->Z[i][sv[t].obsnum];
 2633     }
 2634     for (t=0; t<dset->n; t++) {
 2635         dset->Z[i][t] = x[t];
 2636     }
 2637     }
 2638 
 2639     if (S != NULL) {
 2640     for (t=0; t<dset->n; t++) {
 2641         strcpy(S[t], dset->S[sv[t].obsnum]);
 2642     }
 2643     strings_array_free(dset->S, dset->n);
 2644     dset->S = S;
 2645     }
 2646 
 2647  bailout:
 2648 
 2649     free_spoints(sv, dset->n);
 2650     free(x);
 2651     free(xs);
 2652 
 2653     return err;
 2654 }
 2655 
 2656 static int dataset_sort (DATASET *dset, const int *list,
 2657              gretlopt opt)
 2658 {
 2659     if (dataset_is_time_series(dset) ||
 2660     dataset_is_panel(dset)) {
 2661     gretl_errmsg_set("You can only do this with undated data");
 2662     return E_DATA;
 2663     }
 2664 
 2665     if (list == NULL || list[0] < 1) {
 2666     return E_DATA;
 2667     }
 2668 
 2669     return dataset_sort_by(dset, list, opt);
 2670 }
 2671 
 2672 /**
 2673  * dataset_drop_last_variables:
 2674  * @dset: pointer to dataset.
 2675  * @delvars: number of variables to be dropped.
 2676  *
 2677  * Deletes from the dataset the number @delvars of variables
 2678  * that were added most recently (that have the highest ID numbers).
 2679  *
 2680  * Returns: 0 on success, E_ALLOC on error.
 2681  */
 2682 
 2683 int dataset_drop_last_variables (DATASET *dset, int delvars)
 2684 {
 2685     int newv = dset->v - delvars;
 2686     int i, err = 0;
 2687 
 2688     if (delvars <= 0) {
 2689     return 0;
 2690     }
 2691 
 2692 #if FULLDEBUG
 2693     fprintf(stderr, "*** dataset_drop_last_variables: dropping %d, newv = %d\n",
 2694         delvars, newv);
 2695 #endif
 2696 
 2697     if (newv < 1) {
 2698     fprintf(stderr, "dataset_drop_last_vars: dset->v = %d, delvars = %d "
 2699         " -> newv = %d\n (dset = %p)\n", dset->v, delvars,
 2700         newv, (void *) dset);
 2701     return E_DATA;
 2702     }
 2703 
 2704 #if FULLDEBUG
 2705     for (i=0; i<dset->v; i++) {
 2706     if (dset->Z[i] == NULL) {
 2707         fprintf(stderr, "var %d (%s, level %d, val = NULL) %s\n",
 2708             i, dset->varname[i], dset->varinfo[i]->stack_level,
 2709             (i >= newv)? "deleting" : "");
 2710     } else {
 2711         fprintf(stderr, "var %d (%s, level %d, val[0] = %g) %s\n",
 2712             i, dset->varname[i], dset->varinfo[i]->stack_level,
 2713             dset->Z[i][0], (i >= newv)? "deleting" : "");
 2714     }
 2715     }
 2716 #endif
 2717 
 2718 #if 0
 2719     fprintf(stderr, "dataset_drop_last_variables: origv=%d, newv=%d\n",
 2720         dset->v, newv);
 2721     for (i=1; i<dset->v; i++) {
 2722     fprintf(stderr, "before: var[%d] = '%s'\n", i, dset->varname[i]);
 2723     }
 2724 #endif
 2725 
 2726     for (i=newv; i<dset->v; i++) {
 2727     free(dset->varname[i]);
 2728     free_varinfo(dset, i);
 2729     free(dset->Z[i]);
 2730     dset->Z[i] = NULL;
 2731     }
 2732 
 2733     err = shrink_dataset_to_size(dset, newv, DROP_NORMAL);
 2734 
 2735 #if 0
 2736     for (i=1; i<dset->v; i++) {
 2737     fprintf(stderr, "after: var[%d] = '%s'\n", i, dset->varname[i]);
 2738     }
 2739 #endif
 2740 
 2741     if (!err && !dset->auxiliary) {
 2742     err = gretl_lists_revise(NULL, newv);
 2743     }
 2744 
 2745     if (!err && complex_subsampled()) {
 2746     DATASET *fset = fetch_full_dataset();
 2747 
 2748     /*
 2749        Context: we're deleting @delvars variables at the end of
 2750        dset->Z, leaving @newv variables.  The dataset is currently
 2751        subsampled.
 2752 
 2753        Question: should we delete any variables at the end of
 2754        fset->Z to keep the two arrays in sync?
 2755 
 2756        If @newv < fset->v, this must mean that at least some of
 2757        the extra vars we're deleting from the current sub-sampled
 2758        Z have already been synced to the full Z, so we should do
 2759        the deletion from full Z.
 2760     */
 2761 
 2762     if (newv < fset->v) {
 2763 #if FULLDEBUG
 2764         fprintf(stderr, "prior fset->v = %d: shrinking full Z to %d vars\n",
 2765             fset->v, newv);
 2766 #endif
 2767         for (i=newv; i<fset->v; i++) {
 2768         free(fset->Z[i]);
 2769         fset->Z[i] = NULL;
 2770         }
 2771         err = shrink_dataset_to_size(fset, newv, DROP_SPECIAL);
 2772     }
 2773     }
 2774 
 2775     return err;
 2776 }
 2777 
 2778 /**
 2779  * build_stacked_series:
 2780  * @pstack: location for returning stacked series.
 2781  * @list: list of series to be stacked.
 2782  * @length: number of observations to use per input series.
 2783  * @offset: offset at which to start drawing observations.
 2784  * @dset: pointer to dataset.
 2785  *
 2786  * Really for internal use. Don't worry about it.
 2787  *
 2788  * Returns: 0 on success, non-zero code on error.
 2789  */
 2790 
 2791 int build_stacked_series (double **pstack, int *list,
 2792               int length, int offset,
 2793               DATASET *dset)
 2794 {
 2795     double *xstack = NULL;
 2796     int nv, oldn, bign, tmax;
 2797     int i, err = 0;
 2798 
 2799     if (dset == NULL || dset->n == 0) {
 2800     return E_NODATA;
 2801     } else if (dataset_is_subsampled(dset)) {
 2802     gretl_errmsg_set("stack: this function cannot be used when the dataset "
 2803              "is sub-sampled");
 2804     return E_DATA;
 2805     } else if (list == NULL || list[0] <= 0) {
 2806     return E_INVARG;
 2807     } else if (length <= 0) {
 2808     return E_INVARG;
 2809     } else if (length + offset > dset->n) {
 2810     return E_INVARG;
 2811     }
 2812 
 2813     nv = list[0];
 2814 
 2815 #if PDEBUG
 2816     fprintf(stderr, "nv = %d, length = %d, offset = %d\n", nv, length, offset);
 2817 #endif
 2818 
 2819     bign = nv * length;
 2820     if (bign < dset->n) {
 2821     bign = dset->n;
 2822     }
 2823 
 2824 #if PDEBUG
 2825     fprintf(stderr, "bign = %d, allocating xstack (oldn = %d)\n", bign, dset->n);
 2826 #endif
 2827 
 2828     /* allocate container for stacked data */
 2829     xstack = malloc(bign * sizeof *xstack);
 2830     if (xstack == NULL) {
 2831     return E_ALLOC;
 2832     }
 2833 
 2834     /* extend length of all series? */
 2835     oldn = dset->n;
 2836     if (bign > oldn) {
 2837     err = dataset_add_observations(dset, bign - oldn, OPT_NONE);
 2838     if (err) {
 2839         return err;
 2840     }
 2841     }
 2842 
 2843     tmax = offset + length;
 2844 
 2845     /* construct stacked series */
 2846     for (i=0; i<nv; i++) {
 2847     int j = list[i+1];
 2848     int bigt = length * i;
 2849     int t;
 2850 
 2851     for (t=offset; t<tmax; t++) {
 2852         xstack[bigt] = dset->Z[j][t];
 2853         if (dset->S != NULL && bigt != t) {
 2854         strcpy(dset->S[bigt], dset->S[t]);
 2855         }
 2856         bigt++;
 2857     }
 2858     if (i == nv - 1) {
 2859         for (t=bigt; t<bign; t++) {
 2860         xstack[bigt++] = NADBL;
 2861         }
 2862     }
 2863     }
 2864 
 2865     *pstack = xstack;
 2866 
 2867     return err;
 2868 }
 2869 
 2870 static int found_log_parent (const char *s, char *targ)
 2871 {
 2872     int len = gretl_namechar_spn(s);
 2873 
 2874     if (len < VNAMELEN && s[len] == ')') {
 2875     char fmt[8];
 2876 
 2877     sprintf(fmt, "%%%d[^)]", VNAMELEN-1);
 2878     sscanf(s, fmt, targ);
 2879     return 1;
 2880     }
 2881 
 2882     return 0;
 2883 }
 2884 
 2885 /**
 2886  * series_is_log:
 2887  * @dset: dataset information.
 2888  * @i: ID number of series.
 2889  * @parent: location to which to write the name of the
 2890  * "parent" variable if any.
 2891  *
 2892  * Tries to determine if the variable with ID number @i is
 2893  * the logarithm of some other variable.
 2894  *
 2895  * Returns: 1 if variable @i appears to be a log, else 0.
 2896  */
 2897 
 2898 int series_is_log (const DATASET *dset, int i, char *parent)
 2899 {
 2900     const char *s = series_get_label(dset, i);
 2901 
 2902     *parent = '\0';
 2903 
 2904     if (s != NULL && *s != '\0') {
 2905     char fmt[16];
 2906 
 2907     sprintf(fmt, "= log of %%%ds", VNAMELEN-1);
 2908 
 2909     if (sscanf(s, fmt, parent) == 1) {
 2910         return 1;
 2911     } else if (!strncmp(s, "log(", 4)) {
 2912         return found_log_parent(s + 4, parent);
 2913     } else {
 2914         s += strcspn(s, "=");
 2915         if (!strncmp(s, "=log(", 5)) {
 2916         return found_log_parent(s + 5, parent);
 2917         }
 2918     }
 2919     }
 2920 
 2921     return 0;
 2922 }
 2923 
 2924 /**
 2925  * series_set_discrete:
 2926  * @dset: pointer to data information struct.
 2927  * @i: index number of series.
 2928  * @s: non-zero to mark variable as discrete, zero to
 2929  * mark as not discrete.
 2930  *
 2931  * Mark a variable as being discrete or not.
 2932  */
 2933 
 2934 void series_set_discrete (DATASET *dset, int i, int s)
 2935 {
 2936     if (i > 0 && i < dset->v) {
 2937     int flags = dset->varinfo[i]->flags;
 2938 
 2939     if (s && !(flags & VAR_DISCRETE)) {
 2940         dset->varinfo[i]->flags |= VAR_DISCRETE;
 2941         set_dataset_is_changed(dset, 1);
 2942     } else if (!s && (flags & VAR_DISCRETE)) {
 2943         dset->varinfo[i]->flags &= ~VAR_DISCRETE;
 2944         set_dataset_is_changed(dset, 1);
 2945     }
 2946     }
 2947 }
 2948 
 2949 int series_record_label (DATASET *dset, int i,
 2950              const char *s)
 2951 {
 2952     char *targ = dset->varinfo[i]->label;
 2953 
 2954     if (labels_differ(targ, s)) {
 2955     copy_label(&dset->varinfo[i]->label, s);
 2956     set_dataset_is_changed(dset, 1);
 2957     }
 2958 
 2959     return 0;
 2960 }
 2961 
 2962 int series_record_display_name (DATASET *dset, int i,
 2963                 const char *s)
 2964 {
 2965     char *targ = dset->varinfo[i]->display_name;
 2966 
 2967     if (strcmp(targ, s)) {
 2968     *targ = '\0';
 2969     strncat(targ, s, MAXDISP - 1);
 2970     set_dataset_is_changed(dset, 1);
 2971     }
 2972 
 2973     return 0;
 2974 }
 2975 
 2976 const char *series_get_graph_name (const DATASET *dset, int i)
 2977 {
 2978     const char *ret = dset->varname[i];
 2979 
 2980     if (dset->varinfo != NULL && dset->varinfo[i] != NULL) {
 2981     if (dset->varinfo[i]->display_name[0] != '\0') {
 2982         ret = dset->varinfo[i]->display_name;
 2983     }
 2984     }
 2985 
 2986     return ret;
 2987 }
 2988 
 2989 static int add_obs (int n, DATASET *dset, gretlopt opt, PRN *prn)
 2990 {
 2991     int err = 0;
 2992 
 2993     if (complex_subsampled()) {
 2994     pprintf(prn, _("The data set is currently sub-sampled.\n"));
 2995     err = E_DATA;
 2996     } else if (n <= 0) {
 2997     err = E_PARSE;
 2998     } else if (opt & OPT_T) {
 2999     /* extending panel time */
 3000     err = panel_dataset_extend_time(dset, n);
 3001     if (!err) {
 3002         pprintf(prn, _("Panel time extended by %d observations"), n);
 3003         pputc(prn, '\n');
 3004     }
 3005     } else {
 3006     err = dataset_add_observations(dset, n, OPT_A);
 3007     if (!err) {
 3008         pprintf(prn, _("Dataset extended by %d observations"), n);
 3009         pputc(prn, '\n');
 3010         extend_function_sample_range(n);
 3011     }
 3012     }
 3013 
 3014     return err;
 3015 }
 3016 
 3017 static int insert_obs (int n, DATASET *dset, PRN *prn)
 3018 {
 3019     int err = 0;
 3020 
 3021     if (complex_subsampled()) {
 3022     pprintf(prn, _("The data set is currently sub-sampled.\n"));
 3023     err = E_DATA;
 3024     } else if (dataset_is_panel(dset)) {
 3025     err = E_PDWRONG;
 3026     } else if (n <= 0 || n > dset->n) {
 3027     err = E_DATA;
 3028     } else {
 3029     err = real_insert_observation(n - 1, dset);
 3030     }
 3031 
 3032     return err;
 3033 }
 3034 
 3035 int dataset_op_from_string (const char *s)
 3036 {
 3037     int op = DS_NONE;
 3038 
 3039     if (s == NULL || *s == '\0') {
 3040     return DS_NONE;
 3041     }
 3042 
 3043     if (!strcmp(s, "addobs")) {
 3044     op = DS_ADDOBS;
 3045     } else if (!strcmp(s, "compact")) {
 3046     op = DS_COMPACT;
 3047     } else if (!strcmp(s, "expand")) {
 3048     op = DS_EXPAND;
 3049     } else if (!strcmp(s, "transpose")) {
 3050     op = DS_TRANSPOSE;
 3051     } else if (!strcmp(s, "delete")) {
 3052     op = DS_DELETE;
 3053     } else if (!strcmp(s, "keep")) {
 3054     op = DS_KEEP;
 3055     } else if (!strcmp(s, "sortby")) {
 3056     op = DS_SORTBY;
 3057     } else if (!strcmp(s, "dsortby")) {
 3058     op = DS_DSORTBY;
 3059     } else if (!strcmp(s, "resample")) {
 3060     op = DS_RESAMPLE;
 3061     } else if (!strcmp(s, "restore")) {
 3062     op = DS_RESTORE;
 3063     } else if (!strcmp(s, "clear")) {
 3064     op = DS_CLEAR;
 3065     } else if (!strcmp(s, "renumber")) {
 3066     op = DS_RENUMBER;
 3067     } else if (!strcmp(s, "insobs")) {
 3068     op = DS_INSOBS;
 3069     } else if (!strcmp(s, "pad-daily")) {
 3070     op = DS_PAD_DAILY;
 3071     }
 3072 
 3073     return op;
 3074 }
 3075 
 3076 static int dataset_int_param (const char **ps, int op,
 3077                   DATASET *dset, int *err)
 3078 {
 3079     const char *s = *ps;
 3080     char test[32];
 3081     int k = 0;
 3082 
 3083     if ((op == DS_COMPACT || op == DS_EXPAND) &&
 3084     !dataset_is_time_series(dset)) {
 3085     *err = E_PDWRONG;
 3086     return 0;
 3087     }
 3088 
 3089     if (op == DS_PAD_DAILY && !dated_daily_data(dset)) {
 3090     *err = E_PDWRONG;
 3091     return 0;
 3092     }
 3093 
 3094     *test = '\0';
 3095     sscanf(s, "%31s", test);
 3096     *ps += strlen(test);
 3097 
 3098     k = gretl_int_from_string(test, err);
 3099     if (*err) {
 3100     return 0;
 3101     }
 3102 
 3103     if (k <= 0 || (op == DS_RESAMPLE && k < 1)) {
 3104     *err = E_DATA;
 3105     } else if (op == DS_INSOBS) {
 3106     if (k > dset->n) {
 3107         *err = E_DATA;
 3108     }
 3109     } else if (op == DS_COMPACT) {
 3110     *err = E_PDWRONG;
 3111     if (dset->pd == 12 && (k == 4 || k == 1)) {
 3112         *err = 0;
 3113     } else if (dset->pd == 4 && k == 1) {
 3114         *err = 0;
 3115     } else if (dset->pd == 52 && k == 12) {
 3116         *err = 0;
 3117     } else if (dated_daily_data(dset) && (k == 52 || k == 12)) {
 3118         *err = 0;
 3119     } else if (dataset_is_daily(dset) && k == 4) {
 3120         if (strstr(*ps, "spread")) {
 3121         *err = 0;
 3122         }
 3123     }
 3124     } else if (op == DS_EXPAND) {
 3125     *err = E_PDWRONG;
 3126     if (dset->pd == 1 && (k == 4 || k == 12)) {
 3127         *err = 0;
 3128     } else if (dset->pd == 4 && k == 12) {
 3129         *err = 0;
 3130     }
 3131     } else if (op == DS_PAD_DAILY) {
 3132     if (k < 5 || k > 7 || k < dset->pd) {
 3133         *err = E_PDWRONG;
 3134     }
 3135     }
 3136 
 3137     if (*err == E_PDWRONG) {
 3138     gretl_errmsg_set("This conversion is not supported");
 3139     }
 3140 
 3141     return k;
 3142 }
 3143 
 3144 static int compact_data_set_wrapper (const char *s, DATASET *dset,
 3145                      int k)
 3146 {
 3147     CompactMethod method = COMPACT_AVG;
 3148 
 3149     if (s != NULL) {
 3150     s += strspn(s, " ");
 3151     if (!strcmp(s, "sum")) {
 3152         method = COMPACT_SUM;
 3153     } else if (!strcmp(s, "first") || !strcmp(s, "sop")) {
 3154         method = COMPACT_SOP;
 3155     } else if (!strcmp(s, "last") || !strcmp(s, "eop")) {
 3156         method = COMPACT_EOP;
 3157     } else if (!strcmp(s, "spread")) {
 3158         method = COMPACT_SPREAD;
 3159     } else if (!strcmp(s, "avg") || !strcmp(s, "average")) {
 3160         method = COMPACT_AVG;
 3161     } else if (*s != '\0') {
 3162         return E_PARSE;
 3163     }
 3164     }
 3165 
 3166     return compact_data_set(dset, k, method, 0, 0);
 3167 }
 3168 
 3169 static unsigned int resample_seed;
 3170 
 3171 unsigned int get_resampling_seed (void)
 3172 {
 3173     return resample_seed;
 3174 }
 3175 
 3176 /* resample the dataset by observation, with replacement */
 3177 
 3178 int dataset_resample (DATASET *dset, int n, unsigned int seed)
 3179 {
 3180     DATASET *rset = NULL;
 3181     char **S = NULL;
 3182     unsigned int state;
 3183     int T = sample_size(dset);
 3184     int v = dset->v;
 3185     int i, j, s, t;
 3186     int err = 0;
 3187 
 3188     if (v < 2) {
 3189     return E_DATA;
 3190     }
 3191 
 3192     rset = datainfo_new();
 3193     if (rset == NULL) {
 3194     return E_ALLOC;
 3195     }
 3196 
 3197     rset->Z = malloc(v * sizeof *rset->Z);
 3198     if (rset->Z == NULL) {
 3199     free(rset);
 3200     return E_ALLOC;
 3201     }
 3202 
 3203     for (i=0; i<v; i++) {
 3204     rset->Z[i] = NULL;
 3205     }
 3206 
 3207     rset->v = v;
 3208 
 3209     j = 0;
 3210     for (i=0; i<dset->v && !err; i++) {
 3211     rset->Z[j] = malloc(n * sizeof **rset->Z);
 3212     if (rset->Z[j] == NULL) {
 3213         err = E_ALLOC;
 3214     } else if (i == 0) {
 3215         for (t=0; t<n; t++) {
 3216         rset->Z[j][t] = 1.0;
 3217         }
 3218     }
 3219     j++;
 3220     }
 3221 
 3222     if (err) {
 3223     goto bailout;
 3224     }
 3225 
 3226     if (dset->markers == REGULAR_MARKERS) {
 3227     S = strings_array_new_with_length(n, OBSLEN);
 3228     }
 3229 
 3230     if (seed > 0) {
 3231     resample_seed = seed;
 3232     gretl_rand_set_seed(seed);
 3233     } else {
 3234     resample_seed = gretl_rand_get_seed();
 3235     }
 3236 
 3237     state = gretl_rand_int();
 3238 
 3239     for (t=0; t<n; t++) {
 3240     s = gretl_rand_int_max(T) + dset->t1;
 3241     j = 1;
 3242     for (i=1; i<dset->v; i++) {
 3243         rset->Z[j][t] = dset->Z[i][s];
 3244         j++;
 3245     }
 3246     if (S != NULL) {
 3247         strcpy(S[t], dset->S[s]);
 3248     }
 3249     }
 3250 
 3251     if (S != NULL) {
 3252     rset->S = S;
 3253     rset->markers = REGULAR_MARKERS;
 3254     }
 3255 
 3256     rset->varname = dset->varname;
 3257     rset->varinfo = dset->varinfo;
 3258     rset->descrip = dset->descrip;
 3259 
 3260     rset->n = n;
 3261     rset->t1 = 0;
 3262     rset->t2 = n - 1;
 3263     dataset_obs_info_default(rset);
 3264 
 3265     set_dataset_resampled(rset, state);
 3266 
 3267  bailout:
 3268 
 3269     if (err) {
 3270     free_Z(rset);
 3271     clear_datainfo(rset, CLEAR_SUBSAMPLE);
 3272     free(rset);
 3273     } else {
 3274     backup_full_dataset(dset);
 3275     *dset = *rset;
 3276     free(rset);
 3277     }
 3278 
 3279     return err;
 3280 }
 3281 
 3282 /* note: @list should contain a single series ID, that of the
 3283    target series, and @param should hold a numeric string
 3284    giving the position to which @targ should be moved;
 3285    @fixmax is the greatest series ID number that cannot be
 3286    changed (based on saved models, etc., as determined by the
 3287    caller)
 3288 */
 3289 
 3290 int renumber_series_with_checks (const int *list,
 3291                  const char *param,
 3292                  int fixmax,
 3293                  DATASET *dset,
 3294                  PRN *prn)
 3295 {
 3296     char vname[VNAMELEN];
 3297     int v_old, v_new;
 3298     int f1, err = 0;
 3299 
 3300     if (list == NULL || list[0] != 1 ||
 3301     param == NULL || *param == '\0') {
 3302     return E_INVARG;
 3303     }
 3304 
 3305     if (sscanf(param, "%d", &v_new) != 1) {
 3306     return E_INVARG;
 3307     }
 3308 
 3309     v_old = list[1];
 3310 
 3311     if (v_old < 1 || v_old > dset->v - 1 ||
 3312     v_new < 1 || v_new > dset->v - 1) {
 3313     /* out of bounds */
 3314     return E_INVARG;
 3315     } else if (v_new == v_old) {
 3316     /* no-op */
 3317     return 0;
 3318     }
 3319 
 3320     f1 = max_varno_in_saved_lists();
 3321 
 3322     if (f1 > fixmax) {
 3323     fixmax = f1;
 3324     }
 3325 
 3326     strcpy(vname, dset->varname[v_old]);
 3327 
 3328     if (v_old <= fixmax) {
 3329     gretl_errmsg_sprintf(_("Variable %s cannot be renumbered"), vname);
 3330     err = E_DATA;
 3331     } else if (v_new <= fixmax) {
 3332     gretl_errmsg_sprintf(_("Target ID %d is not available"), v_new);
 3333     err = E_DATA;
 3334     } else {
 3335     err = dataset_renumber_variable(v_old, v_new, dset);
 3336     }
 3337 
 3338     if (!err && gretl_messages_on()) {
 3339     pprintf(prn, _("Renumbered %s as variable %d\n"),
 3340         vname, v_new);
 3341     maybe_list_series(dset, prn);
 3342     }
 3343 
 3344     return err;
 3345 }
 3346 
 3347 /* alternate forms:
 3348 
 3349            @op        @list  @param
 3350    dataset addobs            24
 3351    dataset compact           1
 3352    dataset compact           4 last
 3353    dataset expand            4
 3354    dataset transpose
 3355    dataset sortby     x1
 3356    dataset resample          500
 3357    dataset clear
 3358    dataset renumber   orig   2
 3359    dataset insobs            13
 3360    dataset pad-daily         7
 3361 
 3362 */
 3363 
 3364 int modify_dataset (DATASET *dset, int op, const int *list,
 3365             const char *param, gretlopt opt, PRN *prn)
 3366 {
 3367     static int resampled;
 3368     int k = 0, err = 0;
 3369 
 3370     if (dset == NULL || dset->Z == NULL) {
 3371     return E_NODATA;
 3372     }
 3373 
 3374 #if 0
 3375     fprintf(stderr, "modify_dataset: op=%d, param='%s'\n", op, param);
 3376     printlist(list, "list");
 3377 #endif
 3378 
 3379     if (op == DS_CLEAR || op == DS_RENUMBER) {
 3380     /* must be handled by the calling program */
 3381     return E_NOTIMP;
 3382     }
 3383 
 3384     if (gretl_function_depth() > 0) {
 3385     if (op == DS_ADDOBS && !complex_subsampled() &&
 3386         dset->t2 == dset->n - 1 && !(opt & OPT_T)) {
 3387         /* experimental, 2015-07-28: allow "addobs" within a
 3388            function provided the dataset is not subsampled
 3389         */
 3390         goto proceed;
 3391     } else {
 3392         gretl_errmsg_set(_("The 'dataset' command is not available "
 3393                    "within functions"));
 3394         return 1;
 3395     }
 3396     }
 3397 
 3398     if (gretl_looping() && op != DS_RESAMPLE &&
 3399     op != DS_RESTORE && op != DS_SORTBY) {
 3400     pputs(prn, _("Sorry, this command is not available in loop mode\n"));
 3401     return 1;
 3402     }
 3403 
 3404     if (op == DS_RESAMPLE && resampled) {
 3405     /* repeated "resample": implicitly restore first */
 3406     err = restore_full_sample(dset, NULL);
 3407     if (err) {
 3408         return err;
 3409     } else {
 3410         resampled = 0;
 3411     }
 3412     }
 3413 
 3414     if (op != DS_RESTORE && complex_subsampled()) {
 3415     gretl_errmsg_set(_("The data set is currently sub-sampled"));
 3416     return 1;
 3417     }
 3418 
 3419  proceed:
 3420 
 3421     if (op == DS_ADDOBS || op == DS_INSOBS ||
 3422     op == DS_COMPACT || op == DS_RESAMPLE ||
 3423     op == DS_PAD_DAILY) {
 3424     if (param == NULL) {
 3425         err = E_ARGS;
 3426     } else {
 3427         k = dataset_int_param(&param, op, dset, &err);
 3428     }
 3429     if (err) {
 3430         return err;
 3431     }
 3432     } else if (op == DS_EXPAND) {
 3433     if (param != NULL) {
 3434         k = dataset_int_param(&param, op, dset, &err);
 3435     } else if (dset->pd == 1) {
 3436         k = 4;
 3437     } else if (dset->pd == 4) {
 3438         k = 12;
 3439     } else {
 3440         err = E_PDWRONG;
 3441     }
 3442     if (err) {
 3443         return err;
 3444     }
 3445     }
 3446 
 3447     if (op == DS_ADDOBS) {
 3448     err = add_obs(k, dset, opt, prn);
 3449     } else if (op == DS_INSOBS) {
 3450     err = insert_obs(k, dset, prn);
 3451     } else if (op == DS_COMPACT) {
 3452     err = compact_data_set_wrapper(param, dset, k);
 3453     } else if (op == DS_EXPAND) {
 3454     err = expand_data_set(dset, k);
 3455     } else if (op == DS_PAD_DAILY) {
 3456     err = pad_daily_data(dset, k, prn);
 3457     } else if (op == DS_TRANSPOSE) {
 3458     err = transpose_data(dset);
 3459     } else if (op == DS_SORTBY) {
 3460     err = dataset_sort(dset, list, OPT_NONE);
 3461     } else if (op == DS_DSORTBY) {
 3462     err = dataset_sort(dset, list, OPT_D);
 3463     } else if (op == DS_RESAMPLE) {
 3464     err = dataset_resample(dset, k, 0);
 3465     if (!err) {
 3466         resampled = 1;
 3467     }
 3468     } else if (op == DS_RESTORE) {
 3469     if (resampled) {
 3470         err = restore_full_sample(dset, NULL);
 3471         resampled = 0;
 3472     } else {
 3473         pprintf(prn, _("dataset restore: dataset is not resampled\n"));
 3474         err = E_DATA;
 3475     }
 3476     } else if (op == DS_DELETE) {
 3477     pprintf(prn, "dataset delete: not ready yet\n");
 3478     } else if (op == DS_KEEP) {
 3479     pprintf(prn, "dataset keep: not ready yet\n");
 3480     } else {
 3481     err = E_PARSE;
 3482     }
 3483 
 3484     return err;
 3485 }
 3486 
 3487 int dataset_get_structure (const DATASET *dset)
 3488 {
 3489     if (dset == NULL || dset->n == 0) {
 3490     return DATA_NONE;
 3491     } else if (dataset_is_panel(dset)) {
 3492     return DATA_PANEL;
 3493     } else if (dataset_is_time_series(dset)) {
 3494     return DATA_TS;
 3495     } else {
 3496     return DATA_XSECT;
 3497     }
 3498 }
 3499 
 3500 /**
 3501  * panel_sample_size:
 3502  * @dset: pointer to data information struct.
 3503  *
 3504  * Returns: the numbers of units/individuals in the current
 3505  * sample range, or 0 if the dataset is not a panel.
 3506  */
 3507 
 3508 int panel_sample_size (const DATASET *dset)
 3509 {
 3510     int ret = 0;
 3511 
 3512     if (dataset_is_panel(dset)) {
 3513     ret = (dset->t2 - dset->t1 + 1) / dset->pd;
 3514     }
 3515 
 3516     return ret;
 3517 }
 3518 
 3519 /**
 3520  * multi_unit_panel_sample:
 3521  * @dset: pointer to dataset.
 3522  *
 3523  * Returns: 1 if the dataset is a panel and the current sample
 3524  * range includes two or more individuals, otherwise 0.
 3525  */
 3526 
 3527 int multi_unit_panel_sample (const DATASET *dset)
 3528 {
 3529     int ret = 0;
 3530 
 3531     if (dataset_is_panel(dset)) {
 3532     ret = (dset->t2 - dset->t1 + 1) > dset->pd;
 3533     }
 3534 
 3535     return ret;
 3536 }
 3537 
 3538 /**
 3539  * dataset_purge_missing_rows:
 3540  * @dset: pointer to dataset.
 3541  *
 3542  * Removes empty rows from the dataset -- that is, observations
 3543  * at which there are no non-missing values.  This is intended
 3544  * for daily data only.
 3545  *
 3546  * Returns: 0 on success, non-zero code on error.
 3547  */
 3548 
 3549 int dataset_purge_missing_rows (DATASET *dset)
 3550 {
 3551     int new_n, missrow, totmiss = 0;
 3552     int t1 = dset->t1;
 3553     int t2 = dset->t2;
 3554     char **S = NULL;
 3555     double *Zi = NULL;
 3556     size_t sz;
 3557     int i, t, s;
 3558     int err = 0;
 3559 
 3560     for (t=0; t<dset->n; t++) {
 3561     missrow = 1;
 3562     for (i=1; i<dset->v; i++) {
 3563         if (!na(dset->Z[i][t])) {
 3564         missrow = 0;
 3565         break;
 3566         }
 3567     }
 3568     if (missrow) {
 3569         totmiss++;
 3570         if (t < dset->t1) {
 3571         t1--;
 3572         }
 3573         if (t < dset->t2) {
 3574         t2--;
 3575         }
 3576     }
 3577     }
 3578 
 3579     if (totmiss == 0) {
 3580     /* no-op */
 3581     return 0;
 3582     }
 3583 
 3584     if (dated_daily_data(dset) && dset->S == NULL) {
 3585     err = dataset_allocate_obs_markers(dset);
 3586     if (!err) {
 3587         for (t=0; t<dset->n; t++) {
 3588         calendar_date_string(dset->S[t], t, dset);
 3589         }
 3590     }
 3591     }
 3592 
 3593     for (t=0; t<dset->n; t++) {
 3594     missrow = 1;
 3595     for (i=1; i<dset->v; i++) {
 3596         if (!na(dset->Z[i][t])) {
 3597         missrow = 0;
 3598         break;
 3599         }
 3600     }
 3601     if (missrow) {
 3602         sz = (dset->n - t) * sizeof **dset->Z;
 3603         for (i=1; i<dset->v; i++) {
 3604         memmove(dset->Z[i] + t, dset->Z[i] + t + 1, sz);
 3605         }
 3606         if (dset->S != NULL) {
 3607         free(dset->S[t]);
 3608         for (s=t; s<dset->n - 1; s++) {
 3609             dset->S[s] = dset->S[s+1];
 3610         }
 3611         }
 3612     }
 3613     }
 3614 
 3615     new_n = dset->n - totmiss;
 3616 
 3617     for (i=1; i<dset->v; i++) {
 3618     Zi = realloc(dset->Z[i], new_n * sizeof *Zi);
 3619     if (Zi == NULL) {
 3620         err = E_ALLOC;
 3621     } else {
 3622         dset->Z[i] = Zi;
 3623     }
 3624     }
 3625 
 3626     if (!err && dset->S != NULL) {
 3627     S = realloc(dset->S, new_n * sizeof *S);
 3628     if (S == NULL) {
 3629         err = E_ALLOC;
 3630     } else {
 3631         dset->S = S;
 3632         if (dated_daily_data(dset)) {
 3633         strcpy(dset->stobs, dset->S[0]);
 3634         strcpy(dset->endobs, dset->S[new_n-1]);
 3635         dset->sd0 = get_epoch_day(dset->stobs);
 3636         }
 3637     }
 3638     }
 3639 
 3640     dataset_set_nobs(dset, new_n);
 3641     dset->t1 = t1;
 3642     dset->t2 = t2;
 3643 
 3644     return err;
 3645 }
 3646 
 3647 /**
 3648  * dataset_set_time_series:
 3649  * @dset: pointer to dataset.
 3650  * @pd: time series annual frequency (1 for annual, 4
 3651  * for quarterly or 12 for monthly).
 3652  * @yr0: starting year.
 3653  * @minor0: starting "minor" period, 1-based (quarter or
 3654  * month).
 3655  *
 3656  * Sets time-series properties on @dset: frequency @pd with
 3657  * starting observation @yr0, @minor0. If the data are
 3658  * annual (@pd = 1) then @minor0 is ignored.
 3659 
 3660  * Returns: 0 on success, non-zero code on error.
 3661  */
 3662 
 3663 int dataset_set_time_series (DATASET *dset, int pd,
 3664                  int yr0, int minor0)
 3665 {
 3666     int err = 0;
 3667 
 3668     if (pd != 1 && pd != 4 && pd != 12) {
 3669     err = E_DATA;
 3670     } else if (yr0 < 1) {
 3671     err = E_DATA;
 3672     } else if (pd > 1 && (minor0 < 1 || minor0 > pd)) {
 3673     err = E_DATA;
 3674     } else {
 3675     gchar *stobs = NULL;
 3676 
 3677     dataset_destroy_obs_markers(dset);
 3678     dset->structure = TIME_SERIES;
 3679     dset->pd = pd;
 3680 
 3681     if (pd == 1) {
 3682         stobs = g_strdup_printf("%d", yr0);
 3683     } else if (pd == 4) {
 3684         stobs = g_strdup_printf("%d.%d", yr0, minor0);
 3685     } else {
 3686         stobs = g_strdup_printf("%d.%02d", yr0, minor0);
 3687     }
 3688 
 3689     dset->sd0 = dot_atof(stobs);
 3690     ntolabel(dset->stobs, 0, dset);
 3691     ntolabel(dset->endobs, dset->n - 1, dset);
 3692     g_free(stobs);
 3693     }
 3694 
 3695     return err;
 3696 }
 3697 
 3698 void dataset_clear_sample_record (DATASET *dset)
 3699 {
 3700     if (dset->restriction != NULL) {
 3701     free(dset->restriction);
 3702     dset->restriction = NULL;
 3703     }
 3704 }
 3705 
 3706 /**
 3707  * series_is_discrete:
 3708  * @dset: pointer to dataset.
 3709  * @i: index number of series.
 3710  *
 3711  * Returns: non-zero iff series @i should be treated as discrete.
 3712  */
 3713 
 3714 int series_is_discrete (const DATASET *dset, int i)
 3715 {
 3716     return dset->varinfo[i]->flags & VAR_DISCRETE;
 3717 }
 3718 
 3719 /**
 3720  * series_is_hidden:
 3721  * @dset: pointer to dataset.
 3722  * @i: index number of series.
 3723  *
 3724  * Returns: non-zero iff series @i is hidden.
 3725  */
 3726 
 3727 int series_is_hidden (const DATASET *dset, int i)
 3728 {
 3729     return dset->varinfo[i]->flags & VAR_HIDDEN;
 3730 }
 3731 
 3732 /**
 3733  * series_is_generated:
 3734  * @dset: pointer to dataset.
 3735  * @i: index number of series.
 3736  *
 3737  * Returns: non-zero iff series @i was generated using
 3738  * a formula or transformation function.
 3739  */
 3740 
 3741 int series_is_generated (const DATASET *dset, int i)
 3742 {
 3743     return dset->varinfo[i]->flags & VAR_GENERATED;
 3744 }
 3745 
 3746 /**
 3747  * series_is_listarg:
 3748  * @dset: pointer to dataset.
 3749  * @i: index number of series.
 3750  *
 3751  * Returns: non-zero iff series @i has been marked as
 3752  * belonging to a list argument to a function.
 3753  */
 3754 
 3755 int series_is_listarg (const DATASET *dset, int i)
 3756 {
 3757     return dset->varinfo[i]->flags & VAR_LISTARG;
 3758 }
 3759 
 3760 /**
 3761  * series_is_coded:
 3762  * @dset: pointer to dataset.
 3763  * @i: index number of series.
 3764  *
 3765  * Returns: non-zero iff series @i has been marked as
 3766  * "coded", meaning that its numerical values represent
 3767  * an arbitrary encoding of qualitative characteristics.
 3768  */
 3769 
 3770 int series_is_coded (const DATASET *dset, int i)
 3771 {
 3772     return dset->varinfo[i]->flags & VAR_CODED;
 3773 }
 3774 
 3775 /**
 3776  * series_is_integer_valued:
 3777  * @dset: pointer to dataset.
 3778  * @i: index number of series.
 3779  *
 3780  * Returns: non-zero iff all values in series @i are
 3781  * representable as integers (ignoring missing values).
 3782  */
 3783 
 3784 int series_is_integer_valued (const DATASET *dset, int i)
 3785 {
 3786     const double *x = dset->Z[i];
 3787     int t, n_ok = 0, ret = 1;
 3788 
 3789     for (t=0; t<dset->n; t++) {
 3790     if (!na(x[t])) {
 3791         n_ok++;
 3792         if (x[t] != floor(x[t])) {
 3793         ret = 0;
 3794         break;
 3795         } else if (x[t] > INT_MAX || x[t] < INT_MIN) {
 3796         ret = 0;
 3797         break;
 3798         }
 3799     }
 3800     }
 3801 
 3802     if (n_ok == 0) {
 3803     /* don't let an entirely missing series count as
 3804        "integer-valued"
 3805     */
 3806     ret = 0;
 3807     }
 3808 
 3809     return ret;
 3810 }
 3811 
 3812 /**
 3813  * series_set_flag:
 3814  * @dset: pointer to dataset.
 3815  * @i: index number of series.
 3816  * @flag: flag to set.
 3817  *
 3818  * Sets the given @flag on series @i.
 3819  */
 3820 
 3821 void series_set_flag (DATASET *dset, int i, VarFlags flag)
 3822 {
 3823     if (i > 0 && i < dset->v) {
 3824     dset->varinfo[i]->flags |= flag;
 3825     }
 3826 }
 3827 
 3828 /**
 3829  * series_unset_flag:
 3830  * @dset: pointer to dataset.
 3831  * @i: index number of series.
 3832  * @flag: flag to remove.
 3833  *
 3834  * Unsets the given @flag on series @i.
 3835  */
 3836 
 3837 void series_unset_flag (DATASET *dset, int i, VarFlags flag)
 3838 {
 3839     if (i > 0 && i < dset->v) {
 3840     dset->varinfo[i]->flags &= ~flag;
 3841     }
 3842 }
 3843 
 3844 /**
 3845  * series_get_flags:
 3846  * @dset: pointer to dataset.
 3847  * @i: index number of series.
 3848  *
 3849  * Returns: the flags set series @i.
 3850  */
 3851 
 3852 VarFlags series_get_flags (const DATASET *dset, int i)
 3853 {
 3854     if (i >= 0 && i < dset->v) {
 3855     return dset->varinfo[i]->flags;
 3856     } else {
 3857     return 0;
 3858     }
 3859 }
 3860 
 3861 /**
 3862  * series_zero_flags:
 3863  * @dset: pointer to dataset.
 3864  * @i: index number of series.
 3865  *
 3866  * Sets flags on series @i to zero.
 3867  */
 3868 
 3869 void series_zero_flags (DATASET *dset, int i)
 3870 {
 3871     if (i >= 0 && i < dset->v) {
 3872     dset->varinfo[i]->flags = 0;
 3873     }
 3874 }
 3875 
 3876 /**
 3877  * series_get_label:
 3878  * @dset: pointer to dataset.
 3879  * @i: index number of series.
 3880  *
 3881  * Returns: the descriptive label for series @i.
 3882  */
 3883 
 3884 const char *series_get_label (const DATASET *dset, int i)
 3885 {
 3886     if (i >= 0 && i < dset->v) {
 3887     return dset->varinfo[i]->label;
 3888     } else {
 3889     return NULL;
 3890     }
 3891 }
 3892 
 3893 /**
 3894  * series_get_display_name:
 3895  * @dset: pointer to dataset.
 3896  * @i: index number of series.
 3897  *
 3898  * Returns: the display name for series @i.
 3899  */
 3900 
 3901 const char *series_get_display_name (const DATASET *dset, int i)
 3902 {
 3903     if (i >= 0 && i < dset->v) {
 3904     return dset->varinfo[i]->display_name;
 3905     } else {
 3906     return NULL;
 3907     }
 3908 }
 3909 
 3910 /**
 3911  * series_get_parent_name:
 3912  * @dset: pointer to dataset.
 3913  * @i: index number of series.
 3914  *
 3915  * Returns: the name of the "parent" of series @i
 3916  * (e.g. if series @i is a lag of some other series)
 3917  * or NULL if the series has no parent.
 3918  */
 3919 
 3920 const char *series_get_parent_name (const DATASET *dset, int i)
 3921 {
 3922     if (i > 0 && i < dset->v) {
 3923     if (dset->varinfo[i]->parent[0] != '\0') {
 3924         return dset->varinfo[i]->parent;
 3925     }
 3926     }
 3927 
 3928     return NULL;
 3929 }
 3930 
 3931 /**
 3932  * series_get_parent_id:
 3933  * @dset: pointer to dataset.
 3934  * @i: index number of series.
 3935  *
 3936  * Returns: the ID number of the "parent" of series @i
 3937  * (e.g. if series @i is a lag of some other series)
 3938  * or -1 if the series has no parent.
 3939  */
 3940 
 3941 int series_get_parent_id (const DATASET *dset, int i)
 3942 {
 3943     if (i > 0 && i < dset->v) {
 3944     const char *pname = dset->varinfo[i]->parent;
 3945 
 3946     if (*pname != '\0') {
 3947         return current_series_index(dset, pname);
 3948     }
 3949     }
 3950 
 3951     return -1;
 3952 }
 3953 
 3954 int series_get_lag (const DATASET *dset, int i)
 3955 {
 3956     if (i > 0 && i < dset->v) {
 3957     return dset->varinfo[i]->lag;
 3958     } else {
 3959     return 0;
 3960     }
 3961 }
 3962 
 3963 int series_get_transform (const DATASET *dset, int i)
 3964 {
 3965     if (i > 0 && i < dset->v) {
 3966     return dset->varinfo[i]->transform;
 3967     } else {
 3968     return 0;
 3969     }
 3970 }
 3971 
 3972 /**
 3973  * series_get_compact_method:
 3974  * @dset: pointer to dataset.
 3975  * @i: index number of series.
 3976  *
 3977  * Returns: the compaction method set for series @i.
 3978  */
 3979 
 3980 int series_get_compact_method (const DATASET *dset, int i)
 3981 {
 3982     if (i > 0 && i < dset->v) {
 3983     return dset->varinfo[i]->compact_method;
 3984     } else {
 3985     return 0;
 3986     }
 3987 }
 3988 
 3989 /**
 3990  * series_get_stack_level:
 3991  * @dset: pointer to dataset.
 3992  * @i: index number of series.
 3993  *
 3994  * Returns: the stack level of series @i.
 3995  */
 3996 
 3997 int series_get_stack_level (const DATASET *dset, int i)
 3998 {
 3999     if (i >= 0 && i < dset->v) {
 4000     return dset->varinfo[i]->stack_level;
 4001     } else {
 4002     return 0;
 4003     }
 4004 }
 4005 
 4006 void series_set_mtime (DATASET *dset, int i)
 4007 {
 4008     if (i > 0 && i < dset->v) {
 4009     dset->varinfo[i]->mtime = gretl_monotonic_time();
 4010     }
 4011 }
 4012 
 4013 gint64 series_get_mtime (const DATASET *dset, int i)
 4014 {
 4015     if (i > 0 && i < dset->v) {
 4016     return dset->varinfo[i]->mtime;
 4017     } else {
 4018     return 0;
 4019     }
 4020 }
 4021 
 4022 void series_set_label (DATASET *dset, int i,
 4023                const char *s)
 4024 {
 4025     if (i > 0 && i < dset->v) {
 4026     copy_label(&dset->varinfo[i]->label, s);
 4027     }
 4028 }
 4029 
 4030 void series_set_display_name (DATASET *dset, int i,
 4031                   const char *s)
 4032 {
 4033     if (i > 0 && i < dset->v) {
 4034     char *targ = dset->varinfo[i]->display_name;
 4035 
 4036     *targ = '\0';
 4037     strncat(targ, s, MAXDISP-1);
 4038     }
 4039 }
 4040 
 4041 void series_set_compact_method (DATASET *dset, int i,
 4042                 int method)
 4043 {
 4044     if (i > 0 && i < dset->v) {
 4045     dset->varinfo[i]->compact_method = method;
 4046     }
 4047 }
 4048 
 4049 void series_set_parent (DATASET *dset, int i,
 4050             const char *parent)
 4051 {
 4052     if (i > 0 && i < dset->v) {
 4053     strcpy(dset->varinfo[i]->parent, parent);
 4054     }
 4055 }
 4056 
 4057 void series_set_transform (DATASET *dset, int i,
 4058                int transform)
 4059 {
 4060     if (i > 0 && i < dset->v) {
 4061     dset->varinfo[i]->transform = transform;
 4062     }
 4063 }
 4064 
 4065 void series_set_lag (DATASET *dset, int i, int lag)
 4066 {
 4067     if (i > 0 && i < dset->v) {
 4068     dset->varinfo[i]->lag = lag;
 4069     }
 4070 }
 4071 
 4072 void series_set_stack_level (DATASET *dset, int i, int level)
 4073 {
 4074     if (i > 0 && i < dset->v) {
 4075     dset->varinfo[i]->stack_level = level;
 4076     }
 4077 }
 4078 
 4079 void series_increment_stack_level (DATASET *dset, int i)
 4080 {
 4081     if (i > 0 && i < dset->v) {
 4082     dset->varinfo[i]->stack_level += 1;
 4083     }
 4084 }
 4085 
 4086 void series_decrement_stack_level (DATASET *dset, int i)
 4087 {
 4088     if (i > 0 && i < dset->v) {
 4089     dset->varinfo[i]->stack_level -= 1;
 4090     }
 4091 }
 4092 
 4093 void series_delete_metadata (DATASET *dset, int i)
 4094 {
 4095     if (i > 0 && i < dset->v &&
 4096     dset->varinfo != NULL &&
 4097     dset->varinfo[i] != NULL) {
 4098     dset->varinfo[i]->lag = 0;
 4099     dset->varinfo[i]->transform = 0;
 4100     dset->varinfo[i]->parent[0] = '\0';
 4101     }
 4102 }
 4103 
 4104 void series_ensure_level_zero (DATASET *dset)
 4105 {
 4106     if (dset != NULL) {
 4107     int i, n = 0;
 4108 
 4109     for (i=1; i<dset->v; i++) {
 4110         if (dset->varinfo[i]->stack_level > 0) {
 4111         dset->varinfo[i]->stack_level = 0;
 4112         n++;
 4113         }
 4114     }
 4115 #if 0
 4116     if (n > 0) {
 4117         fprintf(stderr, "Unauthorized access to series detected!\n");
 4118     }
 4119 #endif
 4120     }
 4121 }
 4122 
 4123 void series_attach_string_table (DATASET *dset, int i, void *ptr)
 4124 {
 4125     if (dset != NULL && i > 0 && i < dset->v) {
 4126     series_set_discrete(dset, i, 1);
 4127     dset->varinfo[i]->st = ptr;
 4128     }
 4129 }
 4130 
 4131 void series_destroy_string_table (DATASET *dset, int i)
 4132 {
 4133     if (dset != NULL && i > 0 && i < dset->v) {
 4134     series_table_destroy(dset->varinfo[i]->st);
 4135     dset->varinfo[i]->st = NULL;
 4136     }
 4137 }
 4138 
 4139 /**
 4140  * is_string_valued:
 4141  * @dset: pointer to dataset.
 4142  * @i: index number of series.
 4143  *
 4144  * Returns: 1 if series @i has a table of string values
 4145  * (that is, a mapping from numerical values to associated
 4146  * string values), otherwise 0.
 4147  */
 4148 
 4149 int is_string_valued (const DATASET *dset, int i)
 4150 {
 4151     if (dset != NULL && i > 0 && i < dset->v) {
 4152     return dset->varinfo[i]->st != NULL;
 4153     } else {
 4154     return 0;
 4155     }
 4156 }
 4157 
 4158 /**
 4159  * series_get_string_table:
 4160  * @dset: pointer to dataset.
 4161  * @i: index number of series.
 4162  *
 4163  * Returns: the string table attched to series @i or NULL if
 4164  * there is no such table.
 4165  */
 4166 
 4167 series_table *series_get_string_table (const DATASET *dset, int i)
 4168 {
 4169     if (dset != NULL && i > 0 && i < dset->v) {
 4170     return dset->varinfo[i]->st;
 4171     } else {
 4172     return NULL;
 4173     }
 4174 }
 4175 
 4176 /**
 4177  * series_get_string_for_obs:
 4178  * @dset: pointer to dataset.
 4179  * @i: index number of series.
 4180  * @t: 0-based index of observation.
 4181  *
 4182  * Returns: the string associated with the numerical value of
 4183  * series @i at observation @t, or NULL if there is no such string.
 4184  */
 4185 
 4186 const char *series_get_string_for_obs (const DATASET *dset, int i,
 4187                        int t)
 4188 {
 4189     const char *ret = NULL;
 4190 
 4191     if (i > 0 && i < dset->v && dset->varinfo[i]->st != NULL) {
 4192     ret = series_table_get_string(dset->varinfo[i]->st,
 4193                       dset->Z[i][t]);
 4194     }
 4195 
 4196     return ret;
 4197 }
 4198 
 4199 /**
 4200  * series_get_string_for_value:
 4201  * @dset: pointer to dataset.
 4202  * @i: index number of series.
 4203  * @val: the value to look up.
 4204  *
 4205  * Returns: the string associated with numerical value @val of
 4206  * series @i, or NULL if there is no such string.
 4207  */
 4208 
 4209 const char *series_get_string_for_value (const DATASET *dset, int i,
 4210                      double val)
 4211 {
 4212     const char *ret = NULL;
 4213 
 4214     if (i > 0 && i < dset->v && dset->varinfo[i]->st != NULL) {
 4215     ret = series_table_get_string(dset->varinfo[i]->st, val);
 4216     }
 4217 
 4218     return ret;
 4219 }
 4220 
 4221 /**
 4222  * series_set_string_val:
 4223  * @dset: pointer to dataset.
 4224  * @i: index number of series.
 4225  * @t: 0-based index of observation.
 4226  * @s: the string value to set.
 4227  *
 4228  * Attempts to set the string value for observation @t of series @i
 4229  * to @s. This will fail if the series in question does not have
 4230  * an associated table of string values.
 4231  *
 4232  * Returns: 0 on success, non-zero code on error.
 4233  */
 4234 
 4235 int series_set_string_val (DATASET *dset, int i, int t, const char *s)
 4236 {
 4237     int err = 0;
 4238 
 4239     if (i <= 0 || i >= dset->v) {
 4240     err = E_DATA;
 4241     } else if (dset->varinfo[i]->st == NULL) {
 4242     err = E_TYPES;
 4243     } else {
 4244     series_table *st = dset->varinfo[i]->st;
 4245     double x = series_table_get_value(st, s);
 4246 
 4247     if (na(x)) {
 4248         int k = series_table_add_string(st, s);
 4249 
 4250         if (k < 0) {
 4251         err = E_ALLOC;
 4252         } else {
 4253         dset->Z[i][t] = k;
 4254         }
 4255     } else {
 4256         dset->Z[i][t] = x;
 4257     }
 4258     }
 4259 
 4260     return err;
 4261 }
 4262 
 4263 /**
 4264  * string_series_assign_value:
 4265  * @dset: pointer to dataset.
 4266  * @i: index number of string-valued series.
 4267  * @t: 0-based index of observation.
 4268  * @x: the numeric value to set.
 4269  *
 4270  * Attempts to set the value for observation @t of series @i
 4271  * to @x. This will fail if the @x falls outside of the range
 4272  * of values supported by the string table for the series.
 4273  *
 4274  * Returns: 0 on success, non-zero code on error.
 4275  */
 4276 
 4277 int string_series_assign_value (DATASET *dset, int i,
 4278                 int t, double x)
 4279 {
 4280     series_table *st = NULL;
 4281     int err = 0;
 4282 
 4283     if (i <= 0 || i >= dset->v) {
 4284     err = E_DATA;
 4285     } else if (na(x)) {
 4286     dset->Z[i][t] = x;
 4287     } else if (x != floor(x)) {
 4288     err = E_TYPES;
 4289     } else if ((st = dset->varinfo[i]->st) == NULL) {
 4290     err = E_TYPES;
 4291     } else if (series_table_get_string(st, x) == NULL) {
 4292     err = E_DATA;
 4293     } else {
 4294     dset->Z[i][t] = x;
 4295     }
 4296 
 4297     return err;
 4298 }
 4299 
 4300 /**
 4301  * series_decode_string:
 4302  * @dset: pointer to dataset.
 4303  * @i: index number of series.
 4304  * @s: string to decode.
 4305  *
 4306  * Returns: the numerical value associated with the string
 4307  * @s for series @i, or #NADBL if there's no such value.
 4308  */
 4309 
 4310 double series_decode_string (const DATASET *dset, int i, const char *s)
 4311 {
 4312     double ret = NADBL;
 4313 
 4314     if (i > 0 && i < dset->v && dset->varinfo[i]->st != NULL) {
 4315     ret = series_table_get_value(dset->varinfo[i]->st, s);
 4316     }
 4317 
 4318     return ret;
 4319 }
 4320 
 4321 /**
 4322  * series_get_string_vals:
 4323  * @dset: pointer to dataset.
 4324  * @i: index number of series.
 4325  * @n_strs: location to receive the number of strings, or NULL.
 4326  * @subsample: non-zero to restrict to current sample range.
 4327  *
 4328  * Returns: the array of strings associated with distinct numerical
 4329  * values of series @i, or NULL if there's no such array. The returned
 4330  * array should not be modified in any way; copy the strings first if
 4331  * you need to modify them.
 4332  */
 4333 
 4334 char **series_get_string_vals (const DATASET *dset, int i,
 4335                    int *n_strs, int subsample)
 4336 {
 4337     char **strs = NULL;
 4338     int n = 0;
 4339 
 4340     if (i > 0 && i < dset->v && dset->varinfo[i]->st != NULL) {
 4341     strs = series_table_get_strings(dset->varinfo[i]->st, &n);
 4342     }
 4343 
 4344     if (strs != NULL && subsample && dataset_is_subsampled(dset)) {
 4345     static char **substrs = NULL;
 4346     const double *x = dset->Z[i] + dset->t1;
 4347     int T = dset->t2 - dset->t1 + 1;
 4348     gretl_matrix *valid;
 4349     int err = 0;
 4350 
 4351     if (substrs != NULL) {
 4352         free(substrs);
 4353         substrs = NULL;
 4354     }
 4355     valid = gretl_matrix_values(x, T, OPT_NONE, &err);
 4356     if (err) {
 4357         strs = NULL;
 4358         n = 0;
 4359     } else {
 4360         int j, k, nv = valid->rows;
 4361 
 4362         substrs = strings_array_new(nv);
 4363         for (j=0; j<nv; j++) {
 4364         k = gretl_vector_get(valid, j) - 1;
 4365         substrs[j] = strs[k];
 4366         }
 4367         strs = substrs;
 4368         n = nv;
 4369         gretl_matrix_free(valid);
 4370     }
 4371     }
 4372 
 4373     if (n_strs != NULL) {
 4374     *n_strs = n;
 4375     }
 4376 
 4377     return strs;
 4378 }
 4379 
 4380 /**
 4381  * series_get_string_width:
 4382  * @dset: pointer to dataset.
 4383  * @i: index number of series.
 4384  *
 4385  * Returns: the maximum of (a) the number of characters in the
 4386  * name of series @i and (b) the number of bytes in the longest
 4387  * "string value" attached to series @i, if applicable; or 0
 4388  * if @i is not a valid series index.
 4389  */
 4390 
 4391 int series_get_string_width (const DATASET *dset, int i)
 4392 {
 4393     int n = 0;
 4394 
 4395     if (i > 0 && i < dset->v) {
 4396     n = strlen(dset->varname[i]);
 4397     if (dset->varinfo[i]->st != NULL) {
 4398         char **S;
 4399         int j, ns, m;
 4400 
 4401         S = series_table_get_strings(dset->varinfo[i]->st, &ns);
 4402         for (j=0; j<ns; j++) {
 4403         m = g_utf8_strlen(S[j], -1);
 4404         if (m > n) {
 4405             n = m;
 4406         }
 4407         }
 4408     }
 4409     }
 4410 
 4411     return n;
 4412 }
 4413 
 4414 /**
 4415  * steal_string_table:
 4416  * @l_dset: pointer to recipient dataset.
 4417  * @lvar: index number of target series.
 4418  * @r_dset: pointer to donor dataset.
 4419  * @rvar: index number of source series.
 4420  *
 4421  * Detaches the string table from @rvar in @r_dset and attaches it
 4422  * to @lvar in @l_dset,
 4423  *
 4424  * Returns: 0 on success, non-zero code on error.
 4425  */
 4426 
 4427 int steal_string_table (DATASET *l_dset, int lvar,
 4428             DATASET *r_dset, int rvar)
 4429 {
 4430     if (l_dset != r_dset || lvar != rvar) {
 4431     l_dset->varinfo[lvar]->st = r_dset->varinfo[rvar]->st;
 4432     r_dset->varinfo[rvar]->st = NULL;
 4433     series_set_discrete(l_dset, lvar, 1);
 4434     }
 4435 
 4436     return 0;
 4437 }
 4438 
 4439 /**
 4440  * merge_string_tables:
 4441  * @l_dset: pointer to current dataset.
 4442  * @lvar: index number of target series.
 4443  * @r_dset: pointer to dataset to be appended.
 4444  * @rvar: index number of source series.
 4445  *
 4446  * Translates the encoding of the string-values of series @rvar
 4447  * in @r_dset to that of series @lvar in @l_dset, adding extra
 4448  * strings as needed.
 4449  *
 4450  * Returns: 0 on success, non-zero code on error.
 4451  */
 4452 
 4453 int merge_string_tables (DATASET *l_dset, int lvar,
 4454              DATASET *r_dset, int rvar)
 4455 {
 4456     series_table *lst = l_dset->varinfo[lvar]->st;
 4457     double dx, *x = r_dset->Z[rvar];
 4458     const char *sr;
 4459     int t, idx, err = 0;
 4460 
 4461     for (t=0; t<r_dset->n && !err; t++) {
 4462     if (na(x[t])) {
 4463         continue;
 4464     }
 4465     /* get the right-hand side string associated with x[t] */
 4466     sr = series_get_string_for_value(r_dset, rvar, x[t]);
 4467     /* and look up its numeric code on the left */
 4468     dx = series_decode_string(l_dset, lvar, sr);
 4469     if (!na(dx)) {
 4470         /* got a match: apply the code from @lst */
 4471         x[t] = dx;
 4472     } else {
 4473         /* no match: we need to add a string to @lst */
 4474         idx = series_table_add_string(lst, sr);
 4475         if (idx < 0) {
 4476         err = E_ALLOC;
 4477         } else {
 4478         x[t] = (double) idx;
 4479         }
 4480     }
 4481     }
 4482 
 4483     return err;
 4484 }
 4485 
 4486 static void maybe_adjust_label (DATASET *dset, int v,
 4487                 char **S, int ns)
 4488 {
 4489     int i, len = 3 * ns; /* "=" + ", " */
 4490     char *tmp;
 4491 
 4492     for (i=0; i<ns; i++) {
 4493     len += strlen(S[i]) + 1 + floor(log10(1.0 + i));
 4494     }
 4495 
 4496     /* let's not create a super-long series label */
 4497     if (len > 255) {
 4498     return;
 4499     }
 4500 
 4501     tmp = calloc(len + 1, 1);
 4502 
 4503     if (tmp != NULL) {
 4504     char bit[16];
 4505 
 4506     for (i=0; i<ns; i++) {
 4507         sprintf(bit, "%d=", i+1);
 4508         strcat(tmp, bit);
 4509         strcat(tmp, S[i]);
 4510         if (i < ns - 1) {
 4511         strcat(tmp, ", ");
 4512         }
 4513     }
 4514     copy_label(&dset->varinfo[v]->label, tmp);
 4515     free(tmp);
 4516     }
 4517 }
 4518 
 4519 /* Encode the strings in @a into numerical values in series
 4520    @v of dataset @dset. "Return" via @pU the array of unique
 4521    string values and via @pnu the number of such values.
 4522 */
 4523 
 4524 static int alt_set_strvals (DATASET *dset, int v, gretl_array *a,
 4525                 char ***pU, int *pnu)
 4526 {
 4527     char **S, **U = NULL;
 4528     double *x = dset->Z[v];
 4529     int i, pos, ns, nu = 0;
 4530     int err = 0;
 4531 
 4532     S = gretl_array_get_strings(a, &ns);
 4533 
 4534     for (i=0; i<ns && !err; i++) {
 4535     err = strings_array_add_uniq(&U, &nu, S[i], &pos);
 4536     if (!err) {
 4537         x[i] = pos + 1;
 4538     }
 4539     }
 4540 
 4541     if (!err) {
 4542     *pU = U;
 4543     *pnu = nu;
 4544     } else if (U != NULL) {
 4545     strings_array_free(U, nu);
 4546     }
 4547 
 4548     return err;
 4549 }
 4550 
 4551 static int alt_strvals_case (DATASET *dset, int v, gretl_array *a)
 4552 {
 4553     double *x = dset->Z[v];
 4554     int i;
 4555 
 4556     if (gretl_array_get_length(a) != dset->n) {
 4557     return 0;
 4558     }
 4559 
 4560     for (i=0; i<dset->n; i++) {
 4561     if (x[i] != 0) {
 4562         return 0;
 4563     }
 4564     }
 4565 
 4566     return 1;
 4567 }
 4568 
 4569 /* here we're trying to set strings values on a series from
 4570    scratch */
 4571 
 4572 int series_set_string_vals (DATASET *dset, int i, void *ptr)
 4573 {
 4574     gretl_array *a = ptr;
 4575     gretl_matrix *vals = NULL;
 4576     char **S = NULL;
 4577     int ns = 0;
 4578     int err = 0;
 4579 
 4580     if (a == NULL || dset == NULL || i < 1 || i >= dset->v) {
 4581     return E_DATA;
 4582     }
 4583 
 4584     if (alt_strvals_case(dset, i, a)) {
 4585     err = alt_set_strvals(dset, i, a, &S, &ns);
 4586     if (err) {
 4587         return err;
 4588     } else {
 4589         goto do_strtable;
 4590     }
 4591     }
 4592 
 4593     /* get sorted vector of unique values */
 4594     vals = gretl_matrix_values(dset->Z[i], dset->n, OPT_S, &err);
 4595 
 4596     if (!err) {
 4597     int i, nvals = gretl_vector_get_length(vals);
 4598     double x0 = gretl_vector_get(vals, 0);
 4599     double x1 = gretl_vector_get(vals, nvals - 1);
 4600 
 4601     if (x0 < 1.0) {
 4602         gretl_errmsg_set("The minimum value of the target series "
 4603                  "must be >= 1");
 4604         err = E_DATA;
 4605     } else {
 4606         /* the values should all be integers */
 4607         for (i=0; i<nvals && !err; i++) {
 4608         x1 = gretl_vector_get(vals, i);
 4609         if (x1 != floor(x1)) {
 4610             gretl_errmsg_set("The series values must be integers");
 4611             err = E_DATA;
 4612         }
 4613         }
 4614     }
 4615 
 4616     if (!err) {
 4617         S = gretl_array_get_stringify_strings(a, (int) x1, &ns, &err);
 4618     }
 4619 
 4620     if (!err) {
 4621         /* the strings should all be UTF-8 */
 4622         for (i=0; i<ns && !err; i++) {
 4623         if (!g_utf8_validate(S[i], -1, NULL)) {
 4624             gretl_errmsg_sprintf("String %d is not valid UTF-8", i+1);
 4625             err = E_DATA;
 4626         }
 4627         }
 4628     }
 4629     }
 4630 
 4631  do_strtable:
 4632 
 4633     if (!err) {
 4634     series_table *st = series_table_new(S, ns, &err);
 4635 
 4636     if (!err) {
 4637         if (dset->varinfo[i]->st != NULL) {
 4638         /* remove any pre-existing table */
 4639         series_table_destroy(dset->varinfo[i]->st);
 4640         }
 4641         series_set_discrete(dset, i, 1);
 4642         dset->varinfo[i]->st = st;
 4643         maybe_adjust_label(dset, i, S, ns);
 4644     }
 4645     }
 4646 
 4647     if (err && S != NULL && ns > 0) {
 4648     strings_array_free(S, ns);
 4649     }
 4650 
 4651     gretl_matrix_free(vals);
 4652 
 4653     return err;
 4654 }
 4655 
 4656 /* The pre-checked case: we know that series @i is suitable
 4657    for stringifying, and that @S contains the right number of
 4658    strings.
 4659 */
 4660 
 4661 int series_set_string_vals_direct (DATASET *dset, int i,
 4662                    char **S, int ns)
 4663 {
 4664     int err = 0;
 4665     series_table *st = series_table_new(S, ns, &err);
 4666 
 4667     if (!err) {
 4668     if (dset->varinfo[i]->st != NULL) {
 4669         /* remove any pre-existing table */
 4670         series_table_destroy(dset->varinfo[i]->st);
 4671     }
 4672     series_set_discrete(dset, i, 1);
 4673     dset->varinfo[i]->st = st;
 4674     maybe_adjust_label(dset, i, S, ns);
 4675     }
 4676 
 4677     if (err && S != NULL && ns > 0) {
 4678     strings_array_free(S, ns);
 4679     }
 4680 
 4681     return err;
 4682 }
 4683 
 4684 /**
 4685  * series_recode_strings:
 4686  * @dset: pointer to dataset.
 4687  * @v: index number of target string-valued series.
 4688  * @opt: may contain OPT_P (see below).
 4689  * @changed: location to receive "changed" feedback, or NULL.
 4690  *
 4691  * This function "trims" the array of string values associated
 4692  * with series @v so that it contains no redundant elements --
 4693  * that is, values of which there is no instance in the
 4694  * current sample -- and resets the numeric codes for the
 4695  * strings if necessary.
 4696  *
 4697  * By default the original "series_table" attached to series @v
 4698  * is destroyed, but if @opt contains OPT_P it is replaced but
 4699  * not freed; this make sense only if another pointer to the
 4700  * original table exists.
 4701  *
 4702  * If it happens that the current sample contains
 4703  * instances of all the strings in the full dataset, this
 4704  * function will not actually make any changes to @dset. The
 4705  * @changed argument provides a means of determining
 4706  * whether any change has been made.
 4707  *
 4708  * Returns: 0 on success, non-zero code on error.
 4709  */
 4710 
 4711 int series_recode_strings (DATASET *dset, int v, gretlopt opt,
 4712                int *changed)
 4713 {
 4714     double *x = dset->Z[v] + dset->t1;
 4715     int n = sample_size(dset);
 4716     gretl_matrix *vals = NULL;
 4717     gretl_matrix *repl = NULL;
 4718     char **S = NULL;
 4719     const char *si;
 4720     int ns, nu = 0;
 4721     int err = 0;
 4722 
 4723     if (changed != NULL) {
 4724     *changed = 0;
 4725     }
 4726 
 4727     ns = series_table_get_n_strings(dset->varinfo[v]->st);
 4728     vals = gretl_matrix_values(x, n, OPT_NONE, &err);
 4729 
 4730     if (!err) {
 4731     /* number of unique values */
 4732     nu = vals->rows;
 4733     if (nu == ns) {
 4734         /* nothing to be done */
 4735         gretl_matrix_free(vals);
 4736         return 0;
 4737     }
 4738     repl = gretl_zero_matrix_new(nu, 1);
 4739     S = strings_array_new(nu);
 4740     if (repl == NULL || S == NULL) {
 4741         free(S);
 4742         err = E_ALLOC;
 4743     }
 4744     }
 4745 
 4746     if (!err) {
 4747     int i;
 4748 
 4749     for (i=0; i<nu; i++) {
 4750         si = series_get_string_for_value(dset, v, vals->val[i]);
 4751         S[i] = gretl_strdup(si);
 4752         repl->val[i] = i + 1;
 4753     }
 4754 
 4755     substitute_values(x, x, n, vals->val, nu, repl->val, nu);
 4756 
 4757     if (!(opt & OPT_P)) {
 4758         series_table_destroy(dset->varinfo[v]->st);
 4759     }
 4760     /* the series table takes ownership of @S */
 4761     dset->varinfo[v]->st = series_table_new(S, nu, &err);
 4762 
 4763     if (changed != NULL) {
 4764         *changed = 1;
 4765     }
 4766     }
 4767 
 4768     gretl_matrix_free(vals);
 4769     gretl_matrix_free(repl);
 4770 
 4771     return err;
 4772 }
 4773 
 4774 int set_panel_groups_name (DATASET *dset, const char *vname)
 4775 {
 4776     if (dset->pangrps != NULL) {
 4777     free(dset->pangrps);
 4778     }
 4779 
 4780     dset->pangrps = gretl_strdup(vname);
 4781 
 4782     return (dset->pangrps == NULL)? E_ALLOC : 0;
 4783 }
 4784 
 4785 /* This should be called only after the "group names"
 4786    property of @dset has been (recently) validated, via
 4787    panel_group_names_ok().
 4788 */
 4789 
 4790 const char *get_panel_group_name (const DATASET *dset, int obs)
 4791 {
 4792     const char *s = NULL;
 4793 
 4794     if (dataset_is_panel(dset) && dset->pangrps != NULL &&
 4795     obs >= 0 && obs < dset->n) {
 4796     int v = current_series_index(dset, dset->pangrps);
 4797     series_table *st;
 4798 
 4799     if ((st = series_get_string_table(dset, v)) != NULL) {
 4800         s = series_table_get_string(st, dset->Z[v][obs]);
 4801     }
 4802     }
 4803 
 4804     return (s != NULL)? s : "??";
 4805 }
 4806 
 4807 int panel_group_names_ok (const DATASET *dset, int maxlen)
 4808 {
 4809     int ok = 0;
 4810 
 4811     if (dataset_is_panel(dset) && dset->pangrps != NULL) {
 4812     int ns, v = current_series_index(dset, dset->pangrps);
 4813 
 4814     if (v > 0 && v < dset->v) {
 4815         char **S = series_get_string_vals(dset, v, &ns, 0);
 4816 
 4817         if (S != NULL && ns >= dset->n / dset->pd) {
 4818         ok = 1; /* provisional */
 4819         if (maxlen > 0) {
 4820             int i;
 4821 
 4822             for (i=0; i<ns; i++) {
 4823             if (strlen(S[i]) > maxlen) {
 4824                 ok = 0;
 4825                 break;
 4826             }
 4827             }
 4828         }
 4829         }
 4830     }
 4831     }
 4832 
 4833     return ok;
 4834 }
 4835 
 4836 const char *panel_group_names_varname (const DATASET *dset)
 4837 {
 4838     if (dataset_is_panel(dset) && dset->pangrps != NULL) {
 4839     int ns, v = current_series_index(dset, dset->pangrps);
 4840 
 4841     if (v > 0 && v < dset->v) {
 4842         char **S = series_get_string_vals(dset, v, &ns, 0);
 4843 
 4844         if (S != NULL) {
 4845         int ng = dset->n / dset->pd;
 4846 
 4847         if (ns >= ng) {
 4848             return dset->pangrps;
 4849         }
 4850         }
 4851     }
 4852     }
 4853 
 4854     return NULL;
 4855 }
 4856 
 4857 int is_panel_group_names_series (const DATASET *dset, int v)
 4858 {
 4859     if (dataset_is_panel(dset) && dset->pangrps != NULL) {
 4860     return v == current_series_index(dset, dset->pangrps);
 4861     } else {
 4862     return 0;
 4863     }
 4864 }
 4865 
 4866 static int suitable_group_names_series (const DATASET *dset,
 4867                     int maxlen,
 4868                     int exclude)
 4869 {
 4870     int i, vfound = 0;
 4871 
 4872     for (i=1; i<dset->v && !vfound; i++) {
 4873     if (i == exclude) {
 4874         continue;
 4875     }
 4876     if (is_string_valued(dset, i)) {
 4877         int ns = 0;
 4878         char **S = series_get_string_vals(dset, i, &ns, 0);
 4879 
 4880         if (S != NULL && ns >= dset->n / dset->pd) {
 4881         const char *sbak = NULL;
 4882         int t, u, ubak = -1;
 4883         int fail = 0;
 4884 
 4885         for (t=dset->t1; t<=dset->t2 && !fail; t++) {
 4886             const char *st = series_get_string_for_obs(dset, i, t);
 4887 
 4888             u = t / dset->pd;
 4889             if (st == NULL || sbak == NULL) {
 4890             fail = 1;
 4891             } else if (u == ubak && strcmp(st, sbak)) {
 4892             /* same unit, different label: no */
 4893             fail = 1;
 4894             } else if (ubak >= 0 && u != ubak && !strcmp(st, sbak)) {
 4895             /* different unit, same label: no */
 4896             fail = 2;
 4897             }
 4898             if (!fail && maxlen > 0 && strlen(st) > maxlen) {
 4899             fail = 1;
 4900             }
 4901             ubak = u;
 4902             sbak = st;
 4903         }
 4904         if (!fail) {
 4905             vfound = i;
 4906         }
 4907         }
 4908     }
 4909     }
 4910 
 4911     return vfound;
 4912 }
 4913 
 4914 /* For plotting purposes, try to get labels for panel groups,
 4915    subject to the constraint that they should be no longer
 4916    than @maxlen. If successful, this will return an array of
 4917    at least N strings, where N is the cross-sectional
 4918    dimension of the panel. This array should be treated as
 4919    read-only.
 4920 */
 4921 
 4922 series_table *get_panel_group_table (const DATASET *dset,
 4923                      int maxlen, int *pv)
 4924 {
 4925     series_table *st = NULL;
 4926     int vpg = 0;
 4927 
 4928     if (dset->pangrps != NULL) {
 4929     vpg = current_series_index(dset, dset->pangrps);
 4930     }
 4931 
 4932     /* first see if we have valid group labels set explicitly */
 4933     if (vpg > 0 && panel_group_names_ok(dset, maxlen)) {
 4934     st = dset->varinfo[vpg]->st;
 4935     }
 4936 
 4937     if (st == NULL) {
 4938     /* can we find a suitable string-valued series? */
 4939     int altv = suitable_group_names_series(dset, maxlen, vpg);
 4940 
 4941     if (altv > 0) {
 4942         vpg = altv;
 4943         st = dset->varinfo[vpg]->st;
 4944     }
 4945     }
 4946 
 4947     *pv = (st != NULL)? vpg : 0;
 4948 
 4949     return st;
 4950 }
 4951 
 4952 int is_dataset_series (const DATASET *dset, const double *x)
 4953 {
 4954     int i;
 4955 
 4956     for (i=dset->v-1; i>=0; i--) {
 4957     if (x == dset->Z[i]) {
 4958         return 1;
 4959     }
 4960     }
 4961 
 4962     return 0;
 4963 }
 4964 
 4965 static int effective_daily_skip (int delta, int wd, int pd)
 4966 {
 4967     int k, skip = delta - 1;
 4968 
 4969     if (pd < 7) {
 4970     skip = 0;
 4971     for (k=1; k<delta; k++) {
 4972         wd = (wd == 0)? 6 : wd - 1;
 4973         if (pd == 6) {
 4974         skip += (wd != 0);
 4975         } else {
 4976         skip += (wd != 0 && wd != 6);
 4977         }
 4978     }
 4979     }
 4980 
 4981     return skip;
 4982 }
 4983 
 4984 /* If we get here we've already checked that @dset is dated daily
 4985    data, and that @pd is a valid daily periodicity greater than or
 4986    equal to the current dset->pd.
 4987 */
 4988 
 4989 static int pad_daily_data (DATASET *dset, int pd, PRN *prn)
 4990 {
 4991     DATASET *bigset = NULL;
 4992     char datestr[OBSLEN];
 4993     guint32 ed, ed0 = 0, edbak = 0;
 4994     int wd, skip, totskip = 0;
 4995     int t, err = 0;
 4996 
 4997     for (t=0; t<dset->n; t++) {
 4998     ntolabel(datestr, t, dset);
 4999     if (t == 0) {
 5000         ed0 = edbak = get_epoch_day(datestr);
 5001     } else {
 5002         wd = weekday_from_date(datestr);
 5003         ed = get_epoch_day(datestr);
 5004         skip = effective_daily_skip(ed - edbak, wd, pd);
 5005         totskip += skip;
 5006         edbak = ed;
 5007     }
 5008     }
 5009 
 5010     if (totskip == 0) {
 5011     pprintf(prn, "Dataset is already complete for %d-day calendar", pd);
 5012     return 0;
 5013     }
 5014 
 5015     /* We pass OPT_R here to avoid allocating varnames in @bigset,
 5016        since we're going to preserve these from @dset.
 5017     */
 5018     bigset = real_create_new_dataset(dset->v, dset->n + totskip, OPT_R);
 5019 
 5020     if (bigset == NULL) {
 5021     err = E_ALLOC;
 5022     } else {
 5023     int i, s = 0;
 5024 
 5025     edbak = ed0;
 5026 
 5027     for (t=0; t<dset->n; t++) {
 5028         if (t > 0) {
 5029         ntolabel(datestr, t, dset);
 5030         wd = weekday_from_date(datestr);
 5031         ed = get_epoch_day(datestr);
 5032         s += 1 + effective_daily_skip(ed - edbak, wd, pd);
 5033         edbak = ed;
 5034         }
 5035         for (i=1; i<dset->v; i++) {
 5036         bigset->Z[i][s] = dset->Z[i][t];
 5037         }
 5038     }
 5039 
 5040     bigset->varname = dset->varname;
 5041     bigset->varinfo = dset->varinfo;
 5042     bigset->descrip = dset->descrip;
 5043 
 5044     bigset->pd = pd;
 5045     bigset->structure = TIME_SERIES;
 5046     bigset->sd0 = (double) ed0;
 5047     strcpy(bigset->stobs, dset->stobs);
 5048     ntolabel(bigset->endobs, bigset->n - 1, bigset);
 5049 
 5050     dset->varname = NULL;
 5051     dset->varinfo = NULL;
 5052     dset->descrip = NULL;
 5053     dataset_destroy_obs_markers(dset);
 5054     free_Z(dset);
 5055     clear_datainfo(dset, CLEAR_SUBSAMPLE);
 5056 
 5057     *dset = *bigset;
 5058     free(bigset); /* avoid leaking memory */
 5059     }
 5060 
 5061     return err;
 5062 }
 5063 
 5064 /* MIDAS-related functions */
 5065 
 5066 /* postprocess: fill missing slots in daily data array
 5067    with the period (month or quarter) average
 5068    (FIXME support interpolation as an option?)
 5069 */
 5070 
 5071 int postprocess_daily_data (DATASET *dset, const int *list)
 5072 {
 5073     double *x, xbar, xsum;
 5074     int t, i, n_ok, n_miss;
 5075     int err = 0;
 5076 
 5077     for (t=dset->t1; t<=dset->t2; t++) {
 5078     xsum = 0.0;
 5079     n_ok = n_miss = 0;
 5080     for (i=1; i<=list[0]; i++) {
 5081         x = dset->Z[list[i]];
 5082         if (na(x[t])) {
 5083         n_miss++;
 5084         } else {
 5085         xsum += x[t];
 5086         n_ok++;
 5087         }
 5088     }
 5089     if (n_miss > 0 && n_ok > 0) {
 5090         xbar = xsum / n_ok;
 5091         for (i=1; i<=list[0]; i++) {
 5092         x = dset->Z[list[i]];
 5093         if (na(x[t])) {
 5094             x[t] = xbar;
 5095         }
 5096         }
 5097     }
 5098     }
 5099 
 5100     return err;
 5101 }
 5102 
 5103 int series_get_midas_period (const DATASET *dset, int i)
 5104 {
 5105     if (i > 0 && i < dset->v) {
 5106     return dset->varinfo[i]->midas_period;
 5107     }
 5108 
 5109     return 0;
 5110 }
 5111 
 5112 void series_set_midas_period (const DATASET *dset, int i,
 5113                   int period)
 5114 {
 5115     if (i > 0 && i < dset->v) {
 5116     dset->varinfo[i]->midas_period = period;
 5117     }
 5118 }
 5119 
 5120 int series_get_midas_freq (const DATASET *dset, int i)
 5121 {
 5122     if (i > 0 && i < dset->v) {
 5123     return dset->varinfo[i]->midas_freq;
 5124     }
 5125 
 5126     return 0;
 5127 }
 5128 
 5129 int series_set_midas_freq (const DATASET *dset, int i,
 5130                int freq)
 5131 {
 5132     int err = 0;
 5133 
 5134     if (i > 0 && i < dset->v) {
 5135     if (freq < 5 || freq > 12) {
 5136         err = E_DATA;
 5137     } else {
 5138         dset->varinfo[i]->midas_freq = freq;
 5139     }
 5140     } else {
 5141     err = E_DATA;
 5142     }
 5143 
 5144     return err;
 5145 }
 5146 
 5147 int series_is_midas_anchor (const DATASET *dset, int i)
 5148 {
 5149     if (i > 0 && i < dset->v &&
 5150     (dset->varinfo[i]->flags & VAR_HFANCHOR)) {
 5151     return dset->varinfo[i]->midas_period;
 5152     }
 5153 
 5154     return 0;
 5155 }
 5156 
 5157 void series_set_midas_anchor (const DATASET *dset, int i)
 5158 {
 5159     if (i > 0 && i < dset->v) {
 5160     dset->varinfo[i]->flags |= VAR_HFANCHOR;
 5161     }
 5162 }
 5163 
 5164 /* end MIDAS-related functions */
 5165 
 5166 int series_get_orig_pd (const DATASET *dset, int i)
 5167 {
 5168     if (i > 0 && i < dset->v) {
 5169     return dset->varinfo[i]->orig_pd;
 5170     } else {
 5171     return 0;
 5172     }
 5173 }
 5174 
 5175 void series_set_orig_pd (const DATASET *dset, int i, int pd)
 5176 {
 5177     if (i > 0 && i < dset->v) {
 5178     dset->varinfo[i]->orig_pd = pd;
 5179     }
 5180 }
 5181 
 5182 void series_unset_orig_pd (const DATASET *dset, int i)
 5183 {
 5184     if (i > 0 && i < dset->v) {
 5185     dset->varinfo[i]->orig_pd = 0;
 5186     }
 5187 }
 5188 
 5189 void *series_info_bundle (const DATASET *dset, int i,
 5190               int *err)
 5191 {
 5192     gretl_bundle *b = NULL;
 5193 
 5194     if (dset != NULL && i >= 0 && i < dset->v) {
 5195     b = gretl_bundle_new();
 5196     if (b == NULL) {
 5197         *err = E_ALLOC;
 5198     }
 5199     } else {
 5200     *err = E_DATA;
 5201     }
 5202 
 5203     if (b != NULL) {
 5204     VARINFO *vinfo = dset->varinfo[i];
 5205 
 5206     gretl_bundle_set_string(b, "name", dset->varname[i]);
 5207     if (vinfo->label != NULL) {
 5208         gretl_bundle_set_string(b, "description", vinfo->label);
 5209     }
 5210     if (vinfo->display_name[0] != '\0') {
 5211         gretl_bundle_set_string(b, "graph_name", vinfo->display_name);
 5212     }
 5213     gretl_bundle_set_int(b, "discrete", vinfo->flags & VAR_DISCRETE ?
 5214                  1 : 0);
 5215     gretl_bundle_set_int(b, "coded", vinfo->flags & VAR_CODED ?
 5216                  1 : 0);
 5217     gretl_bundle_set_string(b, "parent", vinfo->parent);
 5218     if (vinfo->transform > 0) {
 5219         gretl_bundle_set_string(b, "transform",
 5220                     gretl_command_word(vinfo->transform));
 5221     } else {
 5222         gretl_bundle_set_string(b, "transform", "none");
 5223     }
 5224     gretl_bundle_set_int(b, "lag", vinfo->lag);
 5225     gretl_bundle_set_int(b, "has_string_table", vinfo->st != NULL);
 5226     if (vinfo->midas_period > 0) {
 5227         gretl_bundle_set_int(b, "midas_period", vinfo->midas_period);
 5228     }
 5229     if (vinfo->midas_freq > 0) {
 5230         gretl_bundle_set_int(b, "midas_freq", vinfo->midas_freq);
 5231     }
 5232     if (vinfo->orig_pd > 0) {
 5233         gretl_bundle_set_int(b, "orig_pd", vinfo->orig_pd);
 5234     }
 5235     }
 5236 
 5237     return b;
 5238 }
 5239 
 5240 /* Given a series label @s, see if it can be recognized
 5241    as identifying the series as the product of two others,
 5242    and if so write the names of the others into @targ1
 5243    and @targ2.
 5244 */
 5245 
 5246 static int get_interaction_names (const char *s,
 5247                   char *targ1,
 5248                   char *targ2)
 5249 {
 5250     const char *p;
 5251     int n1, n2, ret = 0;
 5252 
 5253     *targ1 = *targ2 = '\0';
 5254 
 5255     p = strchr(s, '*');
 5256     if (p == NULL || strchr(p+1, '*') != NULL) {
 5257     /* the label string does not contain a single '*' */
 5258     return 0;
 5259     }
 5260 
 5261     s += strspn(s, " ");
 5262     n1 = gretl_namechar_spn(s);
 5263     p++;
 5264     p += strspn(p, " ");
 5265     n2 = gretl_namechar_spn(p);
 5266 
 5267     if (n1 > 0 && n1 < VNAMELEN &&
 5268     n2 > 0 && n2 < VNAMELEN) {
 5269     strncat(targ1, s, n1);
 5270     strncat(targ2, p, n2);
 5271     ret = 1;
 5272     }
 5273 
 5274     return ret;
 5275 }
 5276 
 5277 /* Given a series label @s, see if it can be recognized as
 5278    identifying the series as the square of another, and if
 5279    so write the name of the other into @targ.
 5280 */
 5281 
 5282 static int get_square_parent_name (const char *s, char *targ,
 5283                    char *targ2)
 5284 {
 5285     const char *p;
 5286     int n1, n2, ret = 0;
 5287 
 5288     *targ = '\0';
 5289 
 5290     if (*s == '=' && (p = strstr(s, "squared")) != NULL) {
 5291     /* "= PARENT squared" */
 5292     s++;
 5293     s += strspn(s, " ");
 5294     n1 = gretl_namechar_spn(s);
 5295     n2 = p - s - 1;
 5296     if (n1 > 0 && n1 < VNAMELEN && n2 == n1) {
 5297         strncat(targ, s, n1);
 5298         ret = 1;
 5299     }
 5300     } else if (strchr(s, '^') != NULL) {
 5301     /* "PARENT^2" */
 5302     n1 = gretl_namechar_spn(s);
 5303     if (n1 > 0 && n1 < VNAMELEN) {
 5304         p = s + n1;
 5305         if (!strcmp(p, "^2")) {
 5306         strncat(targ, s, n1);
 5307         ret = 1;
 5308         }
 5309     }
 5310     } else if ((p = strstr(s, "square of ")) != NULL) {
 5311     p += 9;
 5312     p += strspn(p, " ");
 5313     n1 = gretl_namechar_spn(p);
 5314     if (n1 > 0 && n1 < VNAMELEN) {
 5315         strncat(targ, p, n1);
 5316         ret = 1;
 5317     }
 5318     } else if (get_interaction_names(s, targ, targ2)) {
 5319     /* "x * x" ? */
 5320     if (!strcmp(targ, targ2)) {
 5321         ret = 1;
 5322     }
 5323     }
 5324 
 5325     return ret;
 5326 }
 5327 
 5328 /* Given either (a) two series identified by ID numbers
 5329    i, j where the second is supposed to be the square
 5330    of the first, or (b) three series i, j, k where the
 5331    third is supposed to be the product of the first two,
 5332    check that the putative relationship actually holds
 5333    over the current sample range. Return 1 if so, else 0.
 5334 */
 5335 
 5336 static int validate_relationship (int i, int j, int k,
 5337                   const DATASET *dset)
 5338 {
 5339     double xi, xj;
 5340     int t;
 5341 
 5342     for (t=dset->t1; t<=dset->t2; t++) {
 5343     xi = dset->Z[i][t];
 5344     xj = dset->Z[j][t];
 5345     if (k > 0) {
 5346         /* interaction test: xk = xi*xj */
 5347         if (!na(xi) && !na(xj) && dset->Z[k][t] != xi*xj) {
 5348         return 0;
 5349         }
 5350     } else {
 5351         /* square test: xj = xi*xi */
 5352         if (!na(xi) && xj != xi*xi) {
 5353         return 0;
 5354         }
 5355     }
 5356     }
 5357 
 5358     return 1;
 5359 }
 5360 
 5361 /* In case we find more interaction terms that can be fitted into
 5362    the current column-size of the "list info" matrix, add two more
 5363    (since the encoding of each interaction for a given "primary"
 5364    series requires two columns).
 5365 */
 5366 
 5367 static int resize_listinfo_matrix (gretl_matrix *m)
 5368 {
 5369     int newc = m->cols + 2;
 5370     int i, err = 0;
 5371 
 5372     err = gretl_matrix_realloc(m, m->rows, newc);
 5373     if (!err) {
 5374     for (i=0; i<m->rows; i++) {
 5375         gretl_matrix_set(m, i, newc-2, 0);
 5376         gretl_matrix_set(m, i, newc-1, 0);
 5377     }
 5378     }
 5379 
 5380     return err;
 5381 }
 5382 
 5383 static int get_iact_column (gretl_matrix *m, int i, int *err)
 5384 {
 5385     int j;
 5386 
 5387     for (j=3; j<m->cols; j+=2) {
 5388     if (gretl_matrix_get(m, i, j) == 0) {
 5389         return j;
 5390     }
 5391     }
 5392 
 5393     /* looks like we need more columns */
 5394     *err = resize_listinfo_matrix(m);
 5395     return *err ? -1 : m->cols - 2;
 5396 }
 5397 
 5398 /* The (optionally) "condensed" version of the listinfo_matrix
 5399    includes only primary terms (and excludes the constant).
 5400    The first column of the full matrix is replaced by the
 5401    position in @list of each primary term.
 5402 */
 5403 
 5404 static int condense_listinfo_matrix (gretl_matrix *m,
 5405                      const int *list,
 5406                      const DATASET *dset)
 5407 {
 5408     gretl_matrix *mc = NULL;
 5409     char **S = NULL;
 5410     double x;
 5411     int i, j, ic, n = 0;
 5412 
 5413     for (i=0; i<m->rows; i++) {
 5414     if (m->val[i] == 1) {
 5415         n++;
 5416     }
 5417     }
 5418 
 5419     if (n == m->rows) {
 5420     /* nothing to be done */
 5421     return 0;
 5422     }
 5423 
 5424     mc = gretl_matrix_alloc(n, m->cols);
 5425     if (mc == NULL) {
 5426     return E_ALLOC;
 5427     }
 5428 
 5429     S = strings_array_new(n);
 5430 
 5431     ic = 0;
 5432     for (i=0; i<m->rows; i++) {
 5433     if (m->val[i] == 1) {
 5434         gretl_matrix_set(mc, ic, 0, i+1);
 5435         for (j=1; j<m->cols; j++) {
 5436         x = gretl_matrix_get(m, i, j);
 5437         gretl_matrix_set(mc, ic, j, x);
 5438         }
 5439         S[ic] = gretl_strdup(dset->varname[list[i+1]]);
 5440         ic++;
 5441     }
 5442     }
 5443 
 5444     gretl_matrix_reuse(m, n, m->cols);
 5445     gretl_matrix_copy_values(m, mc);
 5446     gretl_matrix_free(mc);
 5447     gretl_matrix_set_rownames(m, S);
 5448 
 5449     return 0;
 5450 }
 5451 
 5452 static gretl_matrix *
 5453 linfo_matrix_via_labels (const int *list,
 5454