"Fossies" - the Fresh Open Source Software Archive

Member "gretl-2020b/lib/src/dbread.c" (29 Mar 2020, 112461 Bytes) of package /linux/misc/gretl-2020b.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "dbread.c" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 2020a_vs_2020b.

    1 /*
    2  *  gretl -- Gnu Regression, Econometrics and Time-series Library
    3  *  Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
    4  *
    5  *  This program is free software: you can redistribute it and/or modify
    6  *  it under the terms of the GNU General Public License as published by
    7  *  the Free Software Foundation, either version 3 of the License, or
    8  *  (at your option) any later version.
    9  *
   10  *  This program is distributed in the hope that it will be useful,
   11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
   12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   13  *  GNU General Public License for more details.
   14  *
   15  *  You should have received a copy of the GNU General Public License
   16  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
   17  *
   18  */
   19 
   20 /* dbread.c for gretl */
   21 
   22 #include "libgretl.h"
   23 #include "swap_bytes.h"
   24 #include "libset.h"
   25 #include "uservar.h"
   26 #include "matrix_extra.h"
   27 #include "usermat.h"
   28 #include "dbread.h"
   29 #include "gretl_midas.h"
   30 #include "gretl_typemap.h"
   31 #ifdef USE_CURL
   32 # include "gretl_www.h"
   33 #endif
   34 
   35 #include <glib.h>
   36 #include <unistd.h>
   37 #include <errno.h>
   38 
   39 #if G_BYTE_ORDER == G_BIG_ENDIAN
   40 # include <netinet/in.h>
   41 #endif
   42 
   43 /**
   44  * SECTION:dbread
   45  * @short_description: reading from databases
   46  * @title: DB read
   47  * @include: gretl/libgretl.h, gretl/dbread.h
   48  *
   49  * Functions that read data from native gretl databases as
   50  * well as RATS 4.0 and PcGive databases. As you will see,
   51  * this area is mostly undocumented at present, but since it
   52  * may ultimately be useful for third-party coders we will
   53  * try to remedy this!
   54  */
   55 
   56 #define DB_DEBUG 0
   57 
   58 #define RECNUM gint32
   59 #define NAMELENGTH 16
   60 #define RATSCOMMENTLENGTH 80
   61 #define RATSCOMMENTS 2
   62 #define RATS_PARSE_ERROR -999
   63 
   64 typedef struct {
   65     gint32 daynumber;              /* Number of days from 1-1-90
   66                       to year, month, day */
   67     short panel;                   /* 1 for panel set, 2 for intraday
   68                       date set , 0 o.w. */
   69 #define LINEAR    0                /* Single time direction */
   70 #define RATSPANEL 1                /* panel:period */
   71 #define INTRADAY  2                /* date:intraday period */
   72     gint32 panelrecord;            /* Size of panel or
   73                       number of periods per day */
   74     short dclass;                  /* See definitions below */
   75 #define UNDATEDCLASS   0           /* No time series properties */
   76 #define IRREGULARCLASS 1           /* Time series (irregular) */
   77 #define PERYEARCLASS   2           /* x periods / year */
   78 #define PERWEEKCLASS   3           /* x periods / week */
   79 #define DAILYCLASS     4           /* x days / period */
   80     gint32 info;                   /* Number of periods per year or
   81                       per week */
   82     short digits;                  /* Digits for representing panel
   83                       or intraday period */
   84     short year,month,day;          /* Starting year, month, day */
   85 } DATEINFO;
   86 
   87 typedef struct {
   88     RECNUM back_point;             /* Pointer to previous series */
   89     RECNUM forward_point;          /* Pointer to next series */
   90     short back_class;              /* Reserved.  Should be 0 */
   91     short forward_class;           /* Reserved.  Should be 0 */
   92     RECNUM first_data;             /* First data record */
   93     char series_name[NAMELENGTH];  /* Series name */
   94     DATEINFO date_info;            /* Dating scheme for this series */
   95     gint32 datapoints;             /* Number of data points */
   96     short data_type;               /* real, char, complex.
   97                                       Reserved.  Should be 0 */
   98     short digits;                  /* . + digit count for representation
   99                       (0 = unspecified) */
  100     short misc1;                   /* For future expansion should be 0 */
  101     short misc2;
  102     short comment_lines;           /* Number of comment lines (0,1,2) */
  103     char series_class[NAMELENGTH]; /* Series class. Not used, blank */
  104     char comments[RATSCOMMENTS][RATSCOMMENTLENGTH];
  105     char pad[10];
  106 } RATSDirect;
  107 
  108 typedef struct {
  109     RECNUM back_point;             /* Previous record (0 for first) */
  110     RECNUM forward_point;          /* Next record (0 for last) */
  111     double data[31];               /* Data */
  112 } RATSData;
  113 
  114 static char saved_db_name[MAXLEN];
  115 static int saved_db_type;
  116 
  117 #if G_BYTE_ORDER == G_BIG_ENDIAN
  118 float retrieve_float (netfloat nf)
  119 {
  120     short exp = ntohs(nf.exp);
  121     long frac = ntohl(nf.frac);
  122     double receive = frac / 10e6;
  123 
  124     return ldexp(receive, exp);
  125 }
  126 #endif
  127 
  128 static int lib_add_db_data (double **dbZ, SERIESINFO *sinfo,
  129                 DATASET *dset, CompactMethod cmethod,
  130                 int interpolate, int dbv, PRN *prn);
  131 
  132 static int do_compact_spread (DATASET *dset, int newpd);
  133 
  134 static FILE *open_binfile (const char *dbbase, int code, int offset, int *err)
  135 {
  136     char dbbin[MAXLEN];
  137     FILE *fp = NULL;
  138 
  139     strcpy(dbbin, dbbase);
  140     if (code == GRETL_NATIVE_DB) {
  141     if (strstr(dbbin, ".bin") == NULL) {
  142         strcat(dbbin, ".bin");
  143     }
  144     } else {
  145     if (strstr(dbbin, ".bn7") == NULL) {
  146         strcat(dbbin, ".bn7");
  147     }
  148     }
  149 
  150     fp = gretl_fopen(dbbin, "rb");
  151 
  152     if (fp == NULL) {
  153     *err = E_FOPEN;
  154     } else if (fseek(fp, (long) offset, SEEK_SET)) {
  155     *err = DB_PARSE_ERROR;
  156     fclose(fp);
  157     fp = NULL;
  158     }
  159 
  160     return fp;
  161 }
  162 
  163 /**
  164  * get_native_db_data:
  165  * @dbbase:
  166  * @sinfo:
  167  * @Z: data array.
  168  *
  169  * Returns: 0 on success, non-zero code on failure.
  170  */
  171 
  172 int get_native_db_data (const char *dbbase, SERIESINFO *sinfo,
  173             double **Z)
  174 {
  175     char numstr[32];
  176     FILE *fp;
  177     dbnumber x;
  178     int v = sinfo->v;
  179     int t, t2, err = 0;
  180 
  181     fp = open_binfile(dbbase, GRETL_NATIVE_DB, sinfo->offset, &err);
  182     if (err) {
  183     return err;
  184     }
  185 
  186     t2 = (sinfo->t2 > 0)? sinfo->t2 : sinfo->nobs - 1;
  187 
  188     for (t=sinfo->t1; t<=t2 && !err; t++) {
  189     if (fread(&x, sizeof x, 1, fp) != 1) {
  190         err = DB_PARSE_ERROR;
  191     } else {
  192         sprintf(numstr, "%.7g", (double) x); /* N.B. converting a float */
  193         Z[v][t] = atof(numstr);
  194         if (Z[v][t] == DBNA) {
  195         Z[v][t] = NADBL;
  196         }
  197     }
  198     }
  199 
  200     fclose(fp);
  201 
  202     return err;
  203 }
  204 
  205 #ifdef USE_CURL
  206 
  207 /**
  208  * get_remote_db_data:
  209  * @dbbase:
  210  * @sinfo:
  211  * @Z: data array.
  212  *
  213  * Returns: 0 on success, non-zero code on failure.
  214  */
  215 
  216 int get_remote_db_data (const char *dbbase, SERIESINFO *sinfo,
  217             double **Z)
  218 {
  219     char *getbuf = NULL;
  220     int t, t2, err;
  221     int v = sinfo->v;
  222     dbnumber x;
  223     size_t offset;
  224 #if G_BYTE_ORDER == G_BIG_ENDIAN
  225     netfloat nf;
  226 #endif
  227 
  228 #if G_BYTE_ORDER == G_BIG_ENDIAN
  229     err = retrieve_remote_db_data(dbbase, sinfo->varname, &getbuf,
  230                   GRAB_NBO_DATA);
  231 #else
  232     err = retrieve_remote_db_data(dbbase, sinfo->varname, &getbuf,
  233                   GRAB_DATA);
  234 #endif
  235 
  236     if (err) {
  237     free(getbuf);
  238     return E_FOPEN;
  239     }
  240 
  241     t2 = (sinfo->t2 > 0)? sinfo->t2 : sinfo->nobs - 1;
  242 
  243     offset = 0L;
  244     for (t=sinfo->t1; t<=t2; t++) {
  245 #if G_BYTE_ORDER == G_BIG_ENDIAN
  246     /* go via network byte order */
  247     memcpy(&(nf.frac), getbuf + offset, sizeof nf.frac);
  248     offset += sizeof nf.frac;
  249     memcpy(&(nf.exp), getbuf + offset, sizeof nf.exp);
  250     offset += sizeof nf.exp;
  251     x = retrieve_float(nf);
  252 #else
  253     /* just read floats */
  254     memcpy(&x, getbuf + offset, sizeof x);
  255     offset += sizeof x;
  256 #endif
  257     Z[v][t] = (x == DBNA)? NADBL : x;
  258     }
  259 
  260     free(getbuf);
  261 
  262     return 0;
  263 }
  264 
  265 #endif /* USE_CURL */
  266 
  267 /**
  268  * get_pcgive_db_data:
  269  * @dbbase:
  270  * @sinfo:
  271  * @Z: data array.
  272  *
  273  *
  274  * Returns: 0 on success, non-zero code on failure.
  275  */
  276 
  277 int get_pcgive_db_data (const char *dbbase, SERIESINFO *sinfo,
  278             double **Z)
  279 {
  280     FILE *fp;
  281     double x;
  282     int v = sinfo->v;
  283     int t, t2, err = 0;
  284 
  285     fp = open_binfile(dbbase, GRETL_PCGIVE_DB, sinfo->offset, &err);
  286     if (err) {
  287     return err;
  288     }
  289 
  290     t2 = (sinfo->t2 > 0)? sinfo->t2 : sinfo->nobs - 1;
  291 
  292     for (t=sinfo->t1; t<=t2; t++) {
  293     if (fread(&x, sizeof x, 1, fp) != 1) {
  294         err = E_DATA;
  295         break;
  296     }
  297 #if G_BYTE_ORDER == G_BIG_ENDIAN
  298     reverse_double(x);
  299 #endif
  300     if (x == -9999.99 || isnan(x)) {
  301         Z[v][t] = NADBL;
  302         err = DB_MISSING_DATA;
  303     } else {
  304         Z[v][t] = x;
  305     }
  306     }
  307 
  308     fclose(fp);
  309 
  310     return err;
  311 }
  312 
  313 static void get_native_series_comment (SERIESINFO *sinfo, const char *s)
  314 {
  315     s += strcspn(s, " "); /* skip varname */
  316     s += strspn(s, " ");  /* skip space */
  317 
  318     series_info_set_description(sinfo, s);
  319     tailstrip(sinfo->descrip);
  320 }
  321 
  322 static int get_native_series_pd (SERIESINFO *sinfo, char pdc)
  323 {
  324     sinfo->pd = 1;
  325     sinfo->undated = 0;
  326 
  327     if (pdc == 'M') sinfo->pd = 12;
  328     else if (pdc == 'Q') sinfo->pd = 4;
  329     else if (pdc == 'B') sinfo->pd = 5;
  330     else if (pdc == 'S') sinfo->pd = 6;
  331     else if (pdc == 'D') sinfo->pd = 7;
  332     else if (pdc == 'U') sinfo->undated = 1;
  333     else return 1;
  334 
  335     return 0;
  336 }
  337 
  338 static int get_native_series_obs (SERIESINFO *sinfo,
  339                   const char *stobs,
  340                   const char *endobs)
  341 {
  342     char dc = 0;
  343 
  344     if (strchr(stobs, '-')) {
  345     dc = '-';
  346     } else if (strchr(stobs, '/')) {
  347     dc = '/';
  348     }
  349 
  350     if (dc != 0) {
  351     /* calendar data */
  352     const char *q = stobs;
  353     const char *p = strchr(stobs, dc);
  354 
  355     if (p - q == 4) {
  356         strcpy(sinfo->stobs, q);
  357     }
  358     q = endobs;
  359     p = strchr(endobs, dc);
  360     if (p && p - q == 4) {
  361         strcpy(sinfo->endobs, q);
  362     }
  363     } else {
  364     *sinfo->stobs = '\0';
  365     *sinfo->endobs = '\0';
  366     strncat(sinfo->stobs, stobs, OBSLEN-1);
  367     strncat(sinfo->endobs, endobs, OBSLEN-1);
  368     }
  369 
  370     return 0;
  371 }
  372 
  373 static int
  374 open_native_db_files (const char *dname, FILE **f1, char *name1,
  375               FILE **f2, char *name2)
  376 {
  377     char dbbase[FILENAME_MAX];
  378     char fname[FILENAME_MAX];
  379     FILE *fidx = NULL, *fbin = NULL;
  380     int err = 0;
  381 
  382     if (dname != NULL) {
  383     strcpy(dbbase, dname);
  384     } else {
  385     strcpy(dbbase, saved_db_name);
  386     }
  387 
  388     if (has_suffix(dbbase, ".bin")) {
  389     dbbase[strlen(dbbase) - 4] = '\0';
  390     }
  391 
  392     if (f1 != NULL) {
  393     strcpy(fname, dbbase);
  394     strcat(fname, ".idx");
  395 
  396     if (name1 != NULL) {
  397         err = gretl_write_access(fname);
  398         if (!err) {
  399         strcpy(name1, fname);
  400         }
  401     }
  402 
  403     if (!err) {
  404         fidx = gretl_fopen(fname, "r");
  405         if (fidx == NULL) {
  406         gretl_errmsg_set(_("Couldn't open database index file"));
  407         err = E_FOPEN;
  408         }
  409     }
  410     }
  411 
  412     if (f2 != NULL && !err) {
  413     strcpy(fname, dbbase);
  414     strcat(fname, ".bin");
  415 
  416     if (name2 != NULL) {
  417         err = gretl_write_access(fname);
  418         if (!err) {
  419         strcpy(name2, fname);
  420         }
  421     }
  422 
  423     if (!err) {
  424         fbin = gretl_fopen(fname, "rb");
  425         if (fbin == NULL) {
  426         gretl_errmsg_set(_("Couldn't open database binary file"));
  427         err = E_FOPEN;
  428         }
  429     }
  430     }
  431 
  432     if (err) {
  433     if (fidx != NULL) {
  434         fclose(fidx);
  435     }
  436     } else {
  437     if (f1 != NULL) {
  438         *f1 = fidx;
  439     }
  440     if (f2 != NULL) {
  441         *f2 = fbin;
  442     }
  443     }
  444 
  445     return err;
  446 }
  447 
  448 static char *native_db_index_name (void)
  449 {
  450     char *fname;
  451 
  452     if (has_suffix(saved_db_name, ".bin")) {
  453     fname = g_strdup(saved_db_name);
  454     strcpy(fname + strlen(fname) - 3, "idx");
  455     } else {
  456     fname = g_strdup_printf("%s.idx", saved_db_name);
  457     }
  458 
  459     return fname;
  460 }
  461 
  462 static int db_match_glob (FILE *fp,
  463               char *line, int linelen,
  464               GPatternSpec *pspec,
  465               char **S, int *err)
  466 {
  467     char vname[VNAMELEN], l2[72];
  468     int n = 0;
  469 
  470     while (fgets(line, linelen, fp) && !*err) {
  471     if (*line == '#') {
  472         continue;
  473     }
  474     if (gretl_scan_varname(line, vname) != 1) {
  475         break;
  476     }
  477     if (g_pattern_match_string(pspec, vname)) {
  478         if (S != NULL) {
  479         S[n] = gretl_strdup(vname);
  480         }
  481         n++;
  482     }
  483     if (fgets(l2, sizeof l2, fp) == NULL) {
  484         *err = DB_PARSE_ERROR;
  485     }
  486     }
  487 
  488     return n;
  489 }
  490 
  491 static char **native_db_match_series (const char *glob, int *nmatch,
  492                       const char *idxname, int *err)
  493 {
  494     GPatternSpec *pspec;
  495     char **S = NULL;
  496     FILE *fp = NULL;
  497     char line[256];
  498 
  499     fp = gretl_fopen(idxname, "rb");
  500     if (fp == NULL) {
  501     *err = E_FOPEN;
  502     *nmatch = 0;
  503     return NULL;
  504     }
  505 
  506     pspec = g_pattern_spec_new(glob);
  507 
  508     *nmatch = db_match_glob(fp, line, sizeof line, pspec, NULL, err);
  509 
  510     if (!*err && *nmatch > 0) {
  511     S = strings_array_new(*nmatch);
  512     if (S == NULL) {
  513         *nmatch = 0;
  514         *err = E_ALLOC;
  515     } else {
  516         rewind(fp);
  517         db_match_glob(fp, line, sizeof line, pspec, S, err);
  518     }
  519     }
  520 
  521     g_pattern_spec_free(pspec);
  522     fclose(fp);
  523 
  524     return S;
  525 }
  526 
  527 static int get_native_series_info (const char *series,
  528                    SERIESINFO *sinfo,
  529                    const char *idxname)
  530 {
  531     FILE *fp = NULL;
  532     char sername[VNAMELEN];
  533     /* 2019-01-08: enlarge @s1 from 256 to 1024 */
  534     char s1[1024], s2[72];
  535     char stobs[OBSLEN], endobs[OBSLEN];
  536     char pdc;
  537     int offset = 0;
  538     int gotit = 0, err = 0;
  539     int n;
  540 
  541     fp = gretl_fopen(idxname, "rb");
  542     if (fp == NULL) {
  543     return E_FOPEN;
  544     }
  545 
  546     while (fgets(s1, sizeof s1, fp) && !gotit) {
  547     if (*s1 == '#') {
  548         continue;
  549     }
  550     if (gretl_scan_varname(s1, sername) != 1) {
  551         break;
  552     }
  553     if (!strcmp(series, sername)) {
  554         gotit = 1;
  555         strcpy(sinfo->varname, sername);
  556     }
  557     if (fgets(s2, sizeof s2, fp) == NULL) {
  558         err = DB_PARSE_ERROR;
  559         break;
  560     }
  561     if (gotit) {
  562         get_native_series_comment(sinfo, s1);
  563         if (sscanf(s2, "%c %10s %*s %10s %*s %*s %d",
  564                &pdc, stobs, endobs, &sinfo->nobs) != 4) {
  565         gretl_errmsg_set(_("Failed to parse series information"));
  566         err = DB_PARSE_ERROR;
  567         } else {
  568         get_native_series_pd(sinfo, pdc);
  569         get_native_series_obs(sinfo, stobs, endobs);
  570         sinfo->offset = offset;
  571         sinfo->t2 = sinfo->nobs - 1;
  572         }
  573     } else {
  574         if (sscanf(s2, "%*c %*s %*s %*s %*s %*s %d", &n) != 1) {
  575         gretl_errmsg_set(_("Failed to parse series information"));
  576         err = DB_PARSE_ERROR;
  577         } else {
  578         offset += n * sizeof(dbnumber);
  579         }
  580     }
  581     }
  582 
  583     fclose(fp);
  584 
  585     if (!err && !gotit) {
  586     gretl_errmsg_sprintf(_("Series not found, '%s'"), series);
  587     err = DB_NO_SUCH_SERIES;
  588     }
  589 
  590     return err;
  591 }
  592 
  593 #ifdef USE_CURL
  594 
  595 static int remote_db_index_to_file (const char *fname)
  596 {
  597     char *buf = NULL;
  598     int err;
  599 
  600     err = retrieve_remote_db_index(saved_db_name, &buf);
  601 
  602     if (!err) {
  603     FILE *fp = gretl_fopen(fname, "wb");
  604 
  605     if (fp == NULL) {
  606         err = E_FOPEN;
  607     } else {
  608         fputs(buf, fp);
  609         fclose(fp);
  610 #if 1
  611         fprintf(stderr, "remote db index saved\n");
  612 #endif
  613     }
  614     free(buf);
  615     }
  616 
  617     return err;
  618 }
  619 
  620 #endif /* USE_CURL */
  621 
  622 static int in7_get_obs (int y0, int p0, int y1, int p1,
  623             SERIESINFO *sinfo)
  624 {
  625     int pd = sinfo->pd;
  626     int n = (y1 - y0 + 1) * pd - (p0 - 1) - (pd - p1);
  627     int err = 0;
  628 
  629     if (n <= 0) {
  630     err = 1;
  631     } else {
  632     sinfo->nobs = n;
  633     sinfo->t2 = n - 1;
  634     }
  635 
  636     return err;
  637 }
  638 
  639 static int
  640 pcgive_set_stobs_endobs (int y0, int p0, int y1, int p1,
  641              SERIESINFO *sinfo)
  642 {
  643     int err = 0;
  644 
  645     if (sinfo->pd == 1) {
  646     sprintf(sinfo->stobs, "%d", y0);
  647     sprintf(sinfo->endobs, "%d", y1);
  648     if (y0 == 1) {
  649         sinfo->undated = 1;
  650     }
  651     } else if (sinfo->pd == 4) {
  652     sprintf(sinfo->stobs, "%d:%d", y0, p0);
  653     sprintf(sinfo->endobs, "%d:%d", y1, p1);
  654     } else if (sinfo->pd == 12 || sinfo->pd == 52) {
  655     sprintf(sinfo->stobs, "%d:%02d", y0, p0);
  656     sprintf(sinfo->endobs, "%d:%02d", y1, p1);
  657     } else {
  658     err = E_DATA; /* FIXME? */
  659     }
  660 
  661     return err;
  662 }
  663 
  664 static int
  665 get_pcgive_series_info (const char *series, SERIESINFO *sinfo)
  666 {
  667     FILE *fp;
  668     char dbidx[MAXLEN];
  669     char line[1024];
  670     char fmt[24];
  671     char *p;
  672     int y0, p0, y1, p1;
  673     int nf, gotit = 0;
  674     int err = 0;
  675 
  676     strcpy(dbidx, saved_db_name);
  677     p = strstr(dbidx, ".bn7");
  678     if (p != NULL) {
  679     strcpy(p, ".in7");
  680     } else {
  681     strcat(dbidx, ".in7");
  682     }
  683 
  684 #if DB_DEBUG
  685     fprintf(stderr, "get_pcgive_series_info: dbidx = '%s'\n", dbidx);
  686 #endif
  687 
  688     fp = gretl_fopen(dbidx, "r");
  689     if (fp == NULL) {
  690     gretl_errmsg_set(_("Couldn't open database index file"));
  691     return E_FOPEN;
  692     }
  693 
  694     sprintf(fmt, "%%%ds %%d %%d %%d %%d %%d %%d", VNAMELEN - 1);
  695 
  696     while (fgets(line, sizeof line, fp) && !gotit) {
  697     if (*line == '>') {
  698         *sinfo->varname = 0;
  699         nf = sscanf(line + 1, fmt, sinfo->varname, &y0, &p0,
  700             &y1, &p1, &sinfo->pd, &sinfo->offset);
  701         fprintf(stderr, "in7: varname='%s'\n", sinfo->varname);
  702         if (!strcmp(sinfo->varname, series)) {
  703         gotit = 1;
  704         } else {
  705         continue;
  706         }
  707         if (nf == 7 && y0 > 0 && p0 > 0 && y1 > 0 && p1 > 0 &&
  708         sinfo->pd >= 1 && sinfo->offset > 0) {
  709         while (fgets(line, sizeof line, fp)) {
  710             if (*line == ';') {
  711             gretl_strstrip(line);
  712             series_info_set_description(sinfo, line + 1);
  713             } else {
  714             break;
  715             }
  716         }
  717         /* transcribe info */
  718         err = in7_get_obs(y0, p0, y1, p1, sinfo);
  719         if (!err) {
  720             err = pcgive_set_stobs_endobs(y0, p0, y1, p1, sinfo);
  721         }
  722         } else {
  723         err = E_DATA;
  724         }
  725     }
  726     }
  727 
  728     fclose(fp);
  729 
  730     if (!err && !gotit) {
  731     gretl_errmsg_sprintf(_("Series not found, '%s'"), series);
  732     err = DB_NO_SUCH_SERIES;
  733     }
  734 
  735     return err;
  736 }
  737 
  738 /* Figure the ending observation date of a series */
  739 
  740 static int get_endobs (char *datestr, int startyr, int startfrac,
  741                int pd, int n)
  742 {
  743     int endyr, endfrac;
  744 
  745     endyr = startyr + n / pd;
  746     endfrac = startfrac - 1 + n % pd;
  747 
  748     if (endfrac >= pd) {
  749     endyr++;
  750     endfrac -= pd;
  751     }
  752 
  753     if (endfrac == 0) {
  754     endyr--;
  755     endfrac = pd;
  756     }
  757 
  758     if (pd == 1) {
  759     sprintf(datestr, "%d", endyr);
  760     } else if (pd == 4) {
  761     sprintf(datestr, "%d.%d", endyr, endfrac);
  762     } else if (pd == 12 || pd == 52) {
  763     sprintf(datestr, "%d.%02d", endyr, endfrac);
  764     }
  765 
  766     return 0;
  767 }
  768 
  769 static int dinfo_sanity_check (const DATEINFO *dinfo)
  770 {
  771     int err = 0;
  772 
  773     if (dinfo->info < 0 || dinfo->info > 365) {
  774     err = 1;
  775     } else if (dinfo->day < 0 || dinfo->day > 365) {
  776     err = 1;
  777     } else if (dinfo->year < 0 || dinfo->year > 3000) {
  778     err = 1;
  779     } else if (dinfo->info == 52) {
  780     /* note: "month" = week */
  781     if (dinfo->month < 0 || dinfo->month > 52) {
  782         err = 1;
  783     }
  784     } else {
  785     /* annual, quarterly, monthly */
  786     if (dinfo->month < 0 || dinfo->month > 12) {
  787         err = 1;
  788     }
  789     }
  790 
  791     if (err) {
  792     gretl_errmsg_set(_("This is not a valid RATS 4.0 database"));
  793     fprintf(stderr, "rats database: failed dinfo_sanity_check:\n"
  794         " info=%d, year=%d, month=%d, day=%d\n",
  795         (int) dinfo->info, (int) dinfo->year, (int) dinfo->month,
  796         (int) dinfo->day);
  797     }
  798 
  799     return err;
  800 }
  801 
  802 static int dinfo_to_sinfo (const DATEINFO *dinfo, SERIESINFO *sinfo,
  803                const char *varname, const char *comment,
  804                int n, int offset)
  805 {
  806     int startfrac = 0;
  807     char pdstr[8] = {0};
  808     int err = 0;
  809 
  810     if (dinfo_sanity_check(dinfo)) {
  811     return 1;
  812     }
  813 
  814     sprintf(sinfo->stobs, "%d", dinfo->year);
  815 
  816     if (dinfo->info == 4) {
  817     sprintf(pdstr, ".%d", dinfo->month);
  818     if (dinfo->month == 1) {
  819         startfrac = 1;
  820     } else if (dinfo->month > 1 && dinfo->month <= 4) {
  821         startfrac = 2;
  822     } else if (dinfo->month > 4 && dinfo->month <= 7) {
  823         startfrac = 3;
  824     } else {
  825         startfrac = 4;
  826     }
  827     } else if (dinfo->info == 12 || dinfo->info == 52) {
  828     sprintf(pdstr, ".%02d", dinfo->month);
  829     startfrac = dinfo->month;
  830     } else if (dinfo->info == 1) {
  831     startfrac = 0;
  832     } else {
  833     fprintf(stderr, I_("frequency (%d) does not make seem to make sense"),
  834         (int) dinfo->info);
  835     fputc('\n', stderr);
  836     gretl_errmsg_sprintf(("frequency (%d) does not make seem to make sense"),
  837                  (int) dinfo->info);
  838     err = 1;
  839     }
  840 
  841     if (*pdstr) {
  842     strcat(sinfo->stobs, pdstr);
  843     }
  844 
  845     get_endobs(sinfo->endobs, dinfo->year, startfrac, dinfo->info, n);
  846 
  847     sinfo->pd = dinfo->info;
  848     sinfo->nobs = n;
  849     sinfo->t2 = n - 1;
  850     sinfo->offset = offset;
  851 
  852     strncat(sinfo->varname, varname, VNAMELEN - 1);
  853     series_info_set_description(sinfo, comment);
  854 
  855 #if DB_DEBUG
  856     fprintf(stderr, "dinfo_to_sinfo: '%s': set sinfo->offset = %d\n", varname,
  857         (int) offset);
  858 #endif
  859 
  860     return err;
  861 }
  862 
  863 static int in7_to_sinfo (const char *varname, const char *comment,
  864              int y0, int p0, int y1, int p1, int pd,
  865              int offset, SERIESINFO *sinfo)
  866 {
  867     int err = 0;
  868 
  869     if (pd == 4) {
  870     sprintf(sinfo->stobs, "%d.%d", y0, p0);
  871     sprintf(sinfo->endobs, "%d.%d", y1, p1);
  872     } else if (pd == 12 || pd == 52) {
  873     sprintf(sinfo->stobs, "%d.%02d", y0, p0);
  874     sprintf(sinfo->endobs, "%d.%02d", y1, p1);
  875     } else if (pd == 1) {
  876     sprintf(sinfo->stobs, "%d", y0);
  877     sprintf(sinfo->endobs, "%d", y1);
  878     } else {
  879     fprintf(stderr, I_("frequency %d is not supported"), pd);
  880     fputc('\n', stderr);
  881     gretl_errmsg_sprintf(_("frequency %d is not supported"), pd);
  882     err = 1;
  883     }
  884 
  885     if (!err) {
  886     sinfo->pd = pd;
  887     err = in7_get_obs(y0, p0, y1, p1, sinfo);
  888     }
  889 
  890     if (!err) {
  891     strcpy(sinfo->varname, varname);
  892     if (comment != NULL && *comment != 0) {
  893         series_info_set_description(sinfo, comment);
  894     }
  895     sinfo->pd = pd;
  896     sinfo->offset = offset;
  897     }
  898 
  899     return err;
  900 }
  901 
  902 static RECNUM read_rats_directory (FILE *fp, const char *series_name,
  903                    SERIESINFO *sinfo)
  904 {
  905     RATSDirect rdir;
  906     DATEINFO dinfo;
  907     RECNUM ret;
  908     int nread;
  909     int i, err = 0;
  910 
  911     memset(rdir.series_name, 0, NAMELENGTH);
  912 
  913     if (fread(&rdir.back_point, sizeof(RECNUM), 1, fp) != 1) {
  914     err = 1;
  915     } else if (fread(&rdir.forward_point, sizeof(RECNUM), 1, fp) != 1) {
  916     err = 1;
  917     }
  918     if (!err) {
  919     fseek(fp, 4L, SEEK_CUR); /* skip two shorts */
  920     if (fread(&rdir.first_data, sizeof(RECNUM), 1, fp) != 1) {
  921         err = 1;
  922     } else if (fread(rdir.series_name, NAMELENGTH, 1, fp) != 1) {
  923         err = 1;
  924     }
  925     }
  926 
  927     if (!err) {
  928     rdir.series_name[NAMELENGTH-1] = '\0';
  929     gretl_strstrip(rdir.series_name);
  930 #if DB_DEBUG
  931     fprintf(stderr, "read_rats_directory: name='%s'\n", rdir.series_name);
  932 #endif
  933     if (!isprint(rdir.series_name[0])) {
  934         err = 1;
  935     }
  936     }
  937 
  938     if (err) {
  939     return RATS_PARSE_ERROR;
  940     }
  941 
  942     if (series_name != NULL && strcmp(series_name, rdir.series_name)) {
  943     /* specific series not found yet: keep going */
  944     return rdir.forward_point;
  945     }
  946 
  947     /* Now the dateinfo: we can't read this in one go either :-( */
  948 
  949     /* skip long, short, long, short */
  950     fseek(fp, 12, SEEK_CUR);
  951     nread = 0;
  952     nread += fread(&dinfo.info, sizeof(gint32), 1, fp);
  953     nread += fread(&dinfo.digits, sizeof(short), 1, fp);
  954     nread += fread(&dinfo.year, sizeof(short), 1, fp);
  955     nread += fread(&dinfo.month, sizeof(short), 1, fp);
  956     nread += fread(&dinfo.day, sizeof(short), 1, fp);
  957     nread += fread(&rdir.datapoints, sizeof(gint32), 1, fp);
  958 
  959     if (nread != 6) {
  960     return RATS_PARSE_ERROR;
  961     }
  962 
  963     fseek(fp, sizeof(short) * 4L, SEEK_CUR);  /* skip 4 shorts */
  964 
  965 #if DB_DEBUG
  966     fprintf(stderr, "info=%d, digits=%d, year=%d, mon=%d, day=%d\n",
  967         (int) dinfo.info, (int) dinfo.digits, (int) dinfo.year,
  968         (int) dinfo.month, (int) dinfo.day);
  969     fprintf(stderr, "datapoints = %d\n", (int) rdir.datapoints);
  970 #endif
  971 
  972     if (fread(&rdir.comment_lines, sizeof(short), 1, fp) != 1) {
  973     err = 1;
  974     } else {
  975     fseek(fp, 1L, SEEK_CUR); /* skip one char */
  976     for (i=0; i<2 && !err; i++) {
  977         if (i < rdir.comment_lines) {
  978         memset(rdir.comments[i], 0, 80);
  979         err = (fread(rdir.comments[i], 80, 1, fp) != 1);
  980         if (!err) {
  981             rdir.comments[i][79] = '\0';
  982             gretl_strstrip(rdir.comments[i]);
  983         }
  984         } else {
  985         rdir.comments[i][0] = 0;
  986         fseek(fp, 80, SEEK_CUR);
  987         }
  988     }
  989     }
  990 
  991 #if DB_DEBUG
  992     if (!err) {
  993     fprintf(stderr, "comment_lines = %d\n", (int) rdir.comment_lines);
  994     fprintf(stderr, "comment[0] = '%s'\n", rdir.comments[0]);
  995     fprintf(stderr, "comment[1] = '%s'\n", rdir.comments[1]);
  996     }
  997 #endif
  998 
  999     if (!err) {
 1000     err = dinfo_to_sinfo(&dinfo, sinfo, rdir.series_name, rdir.comments[0],
 1001                  rdir.datapoints, rdir.first_data);
 1002     }
 1003 
 1004     ret = (err)? RATS_PARSE_ERROR : rdir.forward_point;
 1005 
 1006 #if DB_DEBUG
 1007     fprintf(stderr, "read_rats_directory: err = %d, forward_point=%d, first_data=%d\n",
 1008         err, (int) rdir.forward_point, (int) rdir.first_data);
 1009     fprintf(stderr, "returning %d\n", (int) ret);
 1010 #endif
 1011 
 1012     return ret;
 1013 }
 1014 
 1015 static void series_info_init (SERIESINFO *sinfo)
 1016 {
 1017     sinfo->t1 = sinfo->t2 = 0;
 1018     sinfo->nobs = 0;
 1019     sinfo->v = 1;
 1020     sinfo->pd = 1;
 1021     sinfo->offset = -1;
 1022     sinfo->err = 0;
 1023     sinfo->undated = 0;
 1024 
 1025     sinfo->varname[0] = '\0';
 1026     sinfo->stobs[0] = '\0';
 1027     sinfo->endobs[0] = '\0';
 1028 
 1029     sinfo->descrip = NULL;
 1030     sinfo->data = NULL;
 1031 }
 1032 
 1033 void series_info_set_description (SERIESINFO *sinfo,
 1034                   const char *s)
 1035 {
 1036     if (sinfo->descrip != NULL) {
 1037     free(sinfo->descrip);
 1038     sinfo->descrip = NULL;
 1039     }
 1040     if (s != NULL && *s != '\0') {
 1041     sinfo->descrip = gretl_strdup(s);
 1042     }
 1043 }
 1044 
 1045 static void series_info_clear (SERIESINFO *sinfo)
 1046 {
 1047     free(sinfo->descrip);
 1048 }
 1049 
 1050 #define DB_INIT_ROWS 32
 1051 
 1052 /**
 1053  * dbwrapper_destroy:
 1054  * @dw: database series wrapper.
 1055  *
 1056  * Frees all resources associated with @dw as well as the pointer
 1057  * itself.
 1058  */
 1059 
 1060 void dbwrapper_destroy (dbwrapper *dw)
 1061 {
 1062     if (dw != NULL) {
 1063     free(dw->fname);
 1064     free(dw->sinfo);
 1065     free(dw);
 1066     }
 1067 }
 1068 
 1069 /**
 1070  * dbwrapper_new:
 1071  * @n: initial number of series.
 1072  * @fname: database filename.
 1073  * @dbtype: database type code.
 1074  *
 1075  * Returns: an allocated database series wrapper.
 1076  */
 1077 
 1078 dbwrapper *dbwrapper_new (int n, const char *fname, int dbtype)
 1079 {
 1080     dbwrapper *dw;
 1081     int i;
 1082 
 1083     if (n == 0) {
 1084     n = DB_INIT_ROWS;
 1085     }
 1086 
 1087     dw = malloc(sizeof *dw);
 1088     if (dw == NULL) {
 1089     return NULL;
 1090     }
 1091 
 1092     dw->fname = gretl_strdup(fname);
 1093     dw->dbtype = dbtype;
 1094 
 1095     dw->sinfo = malloc(n * sizeof *dw->sinfo);
 1096     if (dw->sinfo == NULL) {
 1097     free(dw);
 1098     return NULL;
 1099     }
 1100 
 1101     for (i=0; i<n; i++) {
 1102     series_info_init(&dw->sinfo[i]);
 1103     }
 1104 
 1105     dw->nv = 0;
 1106     dw->nalloc = n;
 1107 
 1108     return dw;
 1109 }
 1110 
 1111 static int dbwrapper_expand (dbwrapper *dw)
 1112 {
 1113     SERIESINFO *sinfo;
 1114     int i, newsz;
 1115 
 1116     newsz = (dw->nv / DB_INIT_ROWS) + 1;
 1117     newsz *= DB_INIT_ROWS;
 1118 
 1119     sinfo = realloc(dw->sinfo, newsz * sizeof *sinfo);
 1120     if (sinfo == NULL) {
 1121     free(dw->sinfo);
 1122     dw->sinfo = NULL;
 1123     return 1;
 1124     }
 1125 
 1126     dw->sinfo = sinfo;
 1127 
 1128     for (i=dw->nalloc; i<newsz; i++) {
 1129     series_info_init(&dw->sinfo[i]);
 1130     }
 1131 
 1132     dw->nalloc = newsz;
 1133 
 1134     return 0;
 1135 }
 1136 
 1137 static int read_in7_series_info (FILE *fp, dbwrapper *dw)
 1138 {
 1139     char line[1024];
 1140     char sname[VNAMELEN];
 1141     char desc[MAXLABEL];
 1142     char fmt[24];
 1143     int y0, p0, y1, p1;
 1144     int pd, offset, pos;
 1145     int i, nf;
 1146     int err = 0;
 1147 
 1148     sprintf(fmt, "%%%ds %%d %%d %%d %%d %%d %%d", VNAMELEN - 1);
 1149 
 1150     i = 0;
 1151     while (fgets(line, sizeof line, fp) && !err) {
 1152     if (*line == '>') {
 1153         nf = sscanf(line + 1, fmt, sname, &y0, &p0, &y1,
 1154             &p1, &pd, &offset);
 1155         if (nf == 7 && y0 > 0 && p0 > 0 && y1 > 0 && p1 > 0 &&
 1156         pd >= 1 && offset > 0) {
 1157         *desc = 0;
 1158         pos = ftell(fp);
 1159         while (fgets(line, sizeof line, fp)) {
 1160             if (*line == ';') {
 1161             /* following series description */
 1162             int rem = MAXLABEL - strlen(desc) - 1;
 1163 
 1164             if (rem > 0) {
 1165                 gretl_strstrip(line);
 1166                 strncat(desc, line + 1, rem);
 1167             }
 1168             pos = ftell(fp);
 1169             } else {
 1170             /* not a description: throw the line back */
 1171             fseek(fp, pos, SEEK_SET);
 1172             break;
 1173             }
 1174         }
 1175         /* record info */
 1176         err = in7_to_sinfo(sname, desc, y0, p0, y1, p1,
 1177                    pd, offset, &dw->sinfo[i++]);
 1178         if (!err) {
 1179             dw->nv += 1;
 1180         }
 1181         }
 1182     }
 1183     }
 1184 
 1185     return err;
 1186 }
 1187 
 1188 static int count_in7_series (FILE *fp, int *err)
 1189 {
 1190     char line[1024];
 1191     char sname[VNAMELEN];
 1192     char fmt[24];
 1193     int y0, p0, y1, p1;
 1194     int pd, offset;
 1195     int nf, i = 0, nseries = 0;
 1196 
 1197     sprintf(fmt, "%%%ds %%d %%d %%d %%d %%d %%d", VNAMELEN - 1);
 1198 
 1199     while (fgets(line, sizeof line, fp)) {
 1200     if (i == 0 && strncmp(line, "pcgive 700", 10)) {
 1201         *err = 1;
 1202         gretl_errmsg_set("This is not a PcGive 700 data file");
 1203         return 0;
 1204     }
 1205     if (*line == '>') {
 1206         nf = sscanf(line + 1, fmt, sname, &y0, &p0, &y1,
 1207             &p1, &pd, &offset);
 1208         if (nf < 7 || y0 < 0 || p0 < 0 || y1 < 0 || p1 < 0 ||
 1209         pd < 1 || offset < 0) {
 1210         fprintf(stderr, "Error reading series info\n");
 1211         } else {
 1212         nseries++;
 1213         }
 1214     }
 1215     i++;
 1216     }
 1217 
 1218     return nseries;
 1219 }
 1220 
 1221 /**
 1222  * read_pcgive_db:
 1223  * @fname: name of database file.
 1224  * @fp: pre-opened stream (caller to close it)
 1225  *
 1226  * Read the series info from a PcGive database, .in7 file
 1227  *
 1228  * Returns: pointer to a #dbwrapper containing the series info,
 1229  * or NULL in case of failure.
 1230  */
 1231 
 1232 dbwrapper *read_pcgive_db (const char *fname, FILE *fp)
 1233 {
 1234     dbwrapper *dw;
 1235     int ns, err = 0;
 1236 
 1237     gretl_error_clear();
 1238 
 1239     ns = count_in7_series(fp, &err);
 1240     if (ns == 0) {
 1241     if (!err) {
 1242         gretl_errmsg_set(_("No valid series found"));
 1243     }
 1244     return NULL;
 1245     }
 1246 
 1247 #if DB_DEBUG
 1248     fprintf(stderr, "in7: found %d series\n", ns);
 1249 #endif
 1250 
 1251     /* allocate table for series rows */
 1252     dw = dbwrapper_new(ns, fname, GRETL_PCGIVE_DB);
 1253     if (dw == NULL) {
 1254     gretl_errmsg_set(_("Out of memory!"));
 1255     return NULL;
 1256     }
 1257 
 1258     rewind(fp);
 1259 
 1260     /* Go find the series info */
 1261     err = read_in7_series_info(fp, dw);
 1262 
 1263     if (err) {
 1264     dbwrapper_destroy(dw);
 1265     dw = NULL;
 1266     }
 1267 
 1268     return dw;
 1269 }
 1270 
 1271 /**
 1272  * read_rats_db:
 1273  * @fname: database filename.
 1274  * @fp: pre-opened stream (caller to close it)
 1275  *
 1276  * Read the series info from a RATS 4.0 database: read the base
 1277  * block at offset 0 in the data file, and recurse through the
 1278  * directory entries.
 1279  *
 1280  * Returns: pointer to a #dbwrapper containing the series info,
 1281  * or NULL in case of failure.
 1282  */
 1283 
 1284 dbwrapper *read_rats_db (const char *fname, FILE *fp)
 1285 {
 1286     dbwrapper *dw;
 1287     long forward = 0;
 1288     int i, err = 0;
 1289 
 1290     gretl_error_clear();
 1291 
 1292     /* get into position */
 1293     fseek(fp, 30L, SEEK_SET); /* skip unneeded fields */
 1294     if (fread(&forward, sizeof forward, 1, fp) == 1) {
 1295     fseek(fp, 4L, SEEK_CUR);
 1296     }
 1297 
 1298     /* basic check */
 1299     if (forward <= 0) {
 1300     gretl_errmsg_set(_("This is not a valid RATS 4.0 database"));
 1301     fprintf(stderr, "rats database: got forward = %ld\n", forward);
 1302     return NULL;
 1303     }
 1304 
 1305     /* allocate table for series rows */
 1306     dw = dbwrapper_new(0, fname, GRETL_RATS_DB);
 1307     if (dw == NULL) {
 1308     gretl_errmsg_set(_("Out of memory!"));
 1309     return NULL;
 1310     }
 1311 
 1312     /* Go find the series */
 1313     i = 0;
 1314     while (forward && !err) {
 1315     dw->nv += 1;
 1316 #if DB_DEBUG
 1317     fprintf(stderr, "read_rats_db: forward = %d, nv = %d\n",
 1318         (int) forward, dw->nv);
 1319 #endif
 1320     if (dw->nv > 0 && dw->nv % DB_INIT_ROWS == 0) {
 1321         err = dbwrapper_expand(dw);
 1322         if (err) {
 1323         gretl_errmsg_set(_("Out of memory!"));
 1324         }
 1325     }
 1326     if (!err) {
 1327         err = fseek(fp, (forward - 1) * 256L, SEEK_SET);
 1328         if (!err) {
 1329         forward = read_rats_directory(fp, NULL, &dw->sinfo[i++]);
 1330         if (forward == RATS_PARSE_ERROR) {
 1331             err = 1;
 1332         }
 1333         }
 1334     }
 1335 #if DB_DEBUG
 1336     fprintf(stderr, "bottom of loop, err = %d\n", err);
 1337 #endif
 1338     }
 1339 
 1340 #if DB_DEBUG
 1341     fprintf(stderr, "read_rats_db: err = %d, dw = %p\n",
 1342         err, (void *) dw);
 1343 #endif
 1344 
 1345     if (err) {
 1346     dbwrapper_destroy(dw);
 1347     return NULL;
 1348     }
 1349 
 1350     return dw;
 1351 }
 1352 
 1353 /* retrieve the actual data values from the data blocks */
 1354 
 1355 static int get_rats_series (int offset, SERIESINFO *sinfo, FILE *fp,
 1356                 double **Z)
 1357 {
 1358     RATSData rdata;
 1359     double x;
 1360     int v = sinfo->v;
 1361     int i, t, T;
 1362     int miss = 0;
 1363     int err = 0;
 1364 
 1365     fprintf(stderr, "get_rats_series: starting from offset %d\n", offset);
 1366 
 1367     if (sinfo->t2 > 0) {
 1368     T = sinfo->t2 + 1;
 1369     } else {
 1370     T = sinfo->nobs;
 1371     }
 1372 
 1373     rdata.forward_point = offset;
 1374     t = sinfo->t1;
 1375 
 1376     while (rdata.forward_point) {
 1377     fseek(fp, (rdata.forward_point - 1) * 256L, SEEK_SET);
 1378     /* the RATSData struct is actually 256 bytes.  Yay! */
 1379     if (fread(&rdata, sizeof rdata, 1, fp) != 1) {
 1380         err = E_DATA;
 1381         break;
 1382     }
 1383     for (i=0; i<31 && t<T; i++) {
 1384         x = rdata.data[i];
 1385 #if G_BYTE_ORDER == G_BIG_ENDIAN
 1386         reverse_double(x);
 1387 #endif
 1388         if (isnan(x)) {
 1389         x = NADBL;
 1390         miss = 1;
 1391         }
 1392         Z[v][t++] = x;
 1393     }
 1394     }
 1395 
 1396     if (miss && !err) {
 1397     err = DB_MISSING_DATA;
 1398     }
 1399 
 1400     return err;
 1401 }
 1402 
 1403 /**
 1404  * get_rats_db_data:
 1405  * @fname: name of RATS 4.0 database to read from
 1406  * @sinfo: holds info on the given series (input)
 1407  * @Z: data matrix
 1408  *
 1409  * Read the actual data values for a series from a RATS database.
 1410  *
 1411  * Returns: 0 on successful completion, E_FOPEN if
 1412  * the data could not be read, and DB_MISSING_DATA if the
 1413  * data were found but there were some missing values.
 1414  */
 1415 
 1416 int get_rats_db_data (const char *fname, SERIESINFO *sinfo,
 1417               double **Z)
 1418 {
 1419     FILE *fp;
 1420     int err = 0;
 1421 
 1422     fp = gretl_fopen(fname, "rb");
 1423     if (fp == NULL) {
 1424     err = E_FOPEN;
 1425     } else {
 1426     err = get_rats_series(sinfo->offset, sinfo, fp, Z);
 1427     fclose(fp);
 1428     }
 1429 
 1430     return err;
 1431 }
 1432 
 1433 static int get_rats_series_info (const char *series_name, SERIESINFO *sinfo)
 1434 {
 1435     FILE *fp;
 1436     long forward = 0;
 1437     int err = 0;
 1438 
 1439     gretl_error_clear();
 1440 
 1441     fp = gretl_fopen(saved_db_name, "rb");
 1442     if (fp == NULL) {
 1443     return E_FOPEN;
 1444     }
 1445 
 1446 #if DB_DEBUG
 1447     fprintf(stderr, "Opened %s\n", saved_db_name);
 1448 #endif
 1449 
 1450     /* get into position */
 1451     fseek(fp, 30L, SEEK_SET);
 1452     if (fread(&forward, sizeof forward, 1, fp) == 1) {
 1453     fseek(fp, 4L, SEEK_CUR);
 1454     }
 1455 
 1456     /* basic check */
 1457     if (forward <= 0) {
 1458     gretl_errmsg_set(_("This is not a valid RATS 4.0 database"));
 1459     fprintf(stderr, "rats database: got forward = %ld\n", forward);
 1460     return DB_PARSE_ERROR;
 1461     }
 1462 
 1463     sinfo->offset = 0;
 1464 
 1465     /* Go find the series */
 1466     while (forward) {
 1467     fseek(fp, (forward - 1) * 256L, SEEK_SET);
 1468     forward = read_rats_directory(fp, series_name, sinfo);
 1469     if (forward == RATS_PARSE_ERROR) {
 1470         sinfo->offset = -1;
 1471     }
 1472     if (sinfo->offset != 0) {
 1473         break;
 1474     }
 1475     }
 1476 
 1477     fclose(fp);
 1478 
 1479     if (sinfo->offset < 0) {
 1480     err = DB_NO_SUCH_SERIES;
 1481     }
 1482 
 1483 #if DB_DEBUG
 1484     fprintf(stderr, "get_rats_series_info: offset = %d\n", sinfo->offset);
 1485     fprintf(stderr, " pd = %d, nobs = %d\n", sinfo->pd, sinfo->nobs);
 1486 #endif
 1487 
 1488     return err;
 1489 }
 1490 
 1491 /* For importation of database series */
 1492 
 1493 static double *get_compacted_xt (const double *src, int n,
 1494                  CompactMethod method,
 1495                  int compfac, int skip)
 1496 {
 1497     int p, t;
 1498     double *x;
 1499 
 1500     x = malloc(n * sizeof *x);
 1501     if (x == NULL) {
 1502     return NULL;
 1503     }
 1504 
 1505     for (t=0; t<n; t++) {
 1506     p = (t + 1) * compfac;
 1507     x[t] = 0.0;
 1508     if (method == COMPACT_AVG || method == COMPACT_SUM) {
 1509         int i, st;
 1510 
 1511         for (i=1; i<=compfac; i++) {
 1512         st = p - i + skip;
 1513         if (na(src[st])) {
 1514             x[t] = NADBL;
 1515             break;
 1516         } else {
 1517             x[t] += src[st];
 1518         }
 1519         }
 1520         if (method == COMPACT_AVG) {
 1521         x[t] /= (double) compfac;
 1522         }
 1523     } else if (method == COMPACT_EOP) {
 1524         x[t] = src[p - 1 + skip];
 1525     } else if (method == COMPACT_SOP) {
 1526         x[t] = src[p - compfac + skip];
 1527     }
 1528     }
 1529 
 1530     return x;
 1531 }
 1532 
 1533 /* Compact a single series from a database, for importation
 1534    into a working dataset of lower frequency.  At present
 1535    this is permitted only for the cases:
 1536 
 1537      quarterly -> annual
 1538      monthly   -> quarterly
 1539      monthly   -> annual
 1540 */
 1541 
 1542 static double *compact_db_series (const double *src,
 1543                   int pd, int *pnobs,
 1544                   char *stobs,
 1545                   int target_pd,
 1546                   CompactMethod method)
 1547 {
 1548     int p0, y0, endskip, goodobs;
 1549     int skip = 0, compfac = pd / target_pd;
 1550     double *x;
 1551 
 1552     if (target_pd == 1) {
 1553     /* figure the annual dates */
 1554     y0 = atoi(stobs);
 1555     p0 = atoi(stobs + 5);
 1556     if (p0 != 1) {
 1557         ++y0;
 1558         skip = compfac - (p0 + 1);
 1559     }
 1560     sprintf(stobs, "%d", y0);
 1561     } else if (target_pd == 4) {
 1562     /* figure the quarterly dates */
 1563     float q;
 1564     int q0;
 1565 
 1566     y0 = atoi(stobs);
 1567     p0 = atoi(stobs + 5);
 1568     q = 1.0 + p0 / 3.;
 1569     q0 = q + .5;
 1570     skip = ((q0 - 1) * 3) + 1 - p0;
 1571     if (q0 == 5) {
 1572         y0++;
 1573         q0 = 1;
 1574     }
 1575     sprintf(stobs, "%d.%d", y0, q0);
 1576     } else {
 1577     return NULL;
 1578     }
 1579 
 1580     endskip = (*pnobs - skip) % compfac;
 1581     goodobs = (*pnobs - skip - endskip) / compfac;
 1582     *pnobs = goodobs;
 1583 
 1584 #if DB_DEBUG
 1585     fprintf(stderr, "startskip = %d\n", skip);
 1586     fprintf(stderr, "endskip = %d\n", endskip);
 1587     fprintf(stderr, "goodobs = %d\n", goodobs);
 1588     fprintf(stderr, "compfac = %d\n", compfac);
 1589     fprintf(stderr, "starting date = %s\n", stobs);
 1590 #endif
 1591 
 1592     x = get_compacted_xt(src, goodobs, method, compfac, skip);
 1593 
 1594     return x;
 1595 }
 1596 
 1597 static double *interpolate_db_series (const double *src,
 1598                       int oldn, int mult,
 1599                       int *err)
 1600 {
 1601     gretl_matrix *yx;
 1602     gretl_matrix *y;
 1603     double *ret = NULL;
 1604     int t;
 1605 
 1606     y = gretl_column_vector_alloc(oldn);
 1607     if (y == NULL) {
 1608     *err = E_ALLOC;
 1609     return NULL;
 1610     }
 1611 
 1612     for (t=0; t<oldn; t++) {
 1613     y->val[t] = src[t];
 1614     }
 1615 
 1616     yx = matrix_chowlin(y, NULL, mult, err);
 1617     gretl_matrix_free(y);
 1618 
 1619     if (!*err) {
 1620     ret = yx->val;
 1621     yx->val = NULL;
 1622     }
 1623 
 1624     gretl_matrix_free(yx);
 1625 
 1626     return ret;
 1627 }
 1628 
 1629 #define EXPAND_DEBUG 0
 1630 
 1631 /* Expand a single series from a database, for importation
 1632    into a working dataset of higher frequency.  At present
 1633    this is permitted only for the cases:
 1634 
 1635    1) annual    -> quarterly
 1636    2) annual    -> monthly
 1637    3) quarterly -> monthly
 1638 
 1639    Interpolation is supported for cases 1 and 3 only.
 1640 */
 1641 
 1642 static double *expand_db_series (const double *src,
 1643                  int pd, int *pnobs,
 1644                  char *stobs, int target_pd,
 1645                  int interpol)
 1646 {
 1647     char new_stobs[OBSLEN] = {0};
 1648     int oldn = *pnobs;
 1649     int mult, newn;
 1650     double *x = NULL;
 1651     int j, t;
 1652     int err = 0;
 1653 
 1654     mult = target_pd / pd;
 1655     newn = mult * oldn;
 1656 
 1657     if (!((target_pd == 4 && pd == 1) ||
 1658       (target_pd == 12 && pd == 4))) {
 1659     interpol = 0;
 1660     }
 1661 
 1662     if (interpol) {
 1663     x = interpolate_db_series(src, oldn, mult, &err);
 1664     } else {
 1665     x = malloc(newn * sizeof *x);
 1666     if (x == NULL) {
 1667         err = E_ALLOC;
 1668     } else {
 1669         int s = 0;
 1670 
 1671         for (t=0; t<oldn; t++) {
 1672         for (j=0; j<mult; j++) {
 1673             x[s++] = src[t];
 1674         }
 1675         }
 1676     }
 1677     }
 1678 
 1679 #if EXPAND_DEBUG
 1680     fprintf(stderr, "expand_db_series 1: mult=%d, newn=%d, stobs='%s'\n",
 1681         mult, newn, stobs);
 1682 #endif
 1683 
 1684     if (err) {
 1685     return NULL;
 1686     }
 1687 
 1688     if (pd == 1) {
 1689     strcpy(new_stobs, stobs);
 1690     if (target_pd == 4) {
 1691         strcat(new_stobs, ":1");
 1692     } else {
 1693         strcat(new_stobs, ":01");
 1694     }
 1695     } else {
 1696     int yr, qtr, mo;
 1697 
 1698     sscanf(stobs, "%d.%d", &yr, &qtr);
 1699     mo = (qtr - 1) * 3 + 1;
 1700     sprintf(new_stobs, "%d:%02d", yr, mo);
 1701     }
 1702 
 1703     /* revise incoming values */
 1704     strcpy(stobs, new_stobs);
 1705     *pnobs = newn;
 1706 
 1707 #if EXPAND_DEBUG
 1708     fprintf(stderr, "expand_db_series 2: pd=%d, stobs='%s'\n",
 1709         *ppd, stobs);
 1710 #endif
 1711 
 1712     return x;
 1713 }
 1714 
 1715 int set_db_name (const char *fname, int filetype, PRN *prn)
 1716 {
 1717     FILE *fp;
 1718     int err = 0;
 1719 
 1720     *saved_db_name = '\0';
 1721     if (fname != NULL) {
 1722     strncat(saved_db_name, fname, MAXLEN - 1);
 1723     }
 1724 
 1725     if (filetype == GRETL_DBNOMICS || filetype == 0) {
 1726     saved_db_type = filetype;
 1727     return 0;
 1728     }
 1729 
 1730     if (filetype == GRETL_NATIVE_DB_WWW) {
 1731 #ifdef USE_CURL
 1732     int n = strlen(saved_db_name);
 1733 
 1734     if (n > 4) {
 1735         n -= 4;
 1736         if (!strcmp(saved_db_name + n, ".bin")) {
 1737         saved_db_name[n] = '\0';
 1738         }
 1739     }
 1740     err = check_remote_db(saved_db_name);
 1741     if (!err) {
 1742         saved_db_type = filetype;
 1743         pprintf(prn, "%s\n", saved_db_name);
 1744     }
 1745 #else
 1746     pprintf(prn, _("Internet access not supported"));
 1747     pputc(prn, '\n');
 1748     err = E_DATA;
 1749 #endif
 1750     return err;
 1751     }
 1752 
 1753     fp = gretl_fopen(saved_db_name, "rb");
 1754 
 1755     if (fp == NULL && !g_path_is_absolute(saved_db_name) &&
 1756     filetype == GRETL_NATIVE_DB) {
 1757     /* try looking a bit more */
 1758     const char *path = gretl_binbase();
 1759 
 1760     if (path != NULL && *path != '\0') {
 1761         gretl_build_path(saved_db_name, path, fname, NULL);
 1762         fp = gretl_fopen(saved_db_name, "rb");
 1763     }
 1764 
 1765 #ifdef OS_OSX
 1766     if (fp == NULL) {
 1767         gchar *tmp = g_build_filename(gretl_app_support_dir(), "db",
 1768                       fname, NULL);
 1769 
 1770         fp = gretl_fopen(tmp, "rb");
 1771         if (fp != NULL) {
 1772         strcpy(saved_db_name, tmp);
 1773         }
 1774         g_free(tmp);
 1775     }
 1776 #endif
 1777     }
 1778 
 1779     if (fp == NULL) {
 1780     *saved_db_name = '\0';
 1781     pprintf(prn, _("Couldn't open %s\n"), fname);
 1782     err = E_FOPEN;
 1783     } else {
 1784     fclose(fp);
 1785     saved_db_type = filetype;
 1786     pprintf(prn, "%s\n", saved_db_name);
 1787     }
 1788 
 1789     return err;
 1790 }
 1791 
 1792 const char *get_db_name (void)
 1793 {
 1794     return saved_db_name;
 1795 }
 1796 
 1797 /* Handling of DSN setup for ODBC: grab the dsn, username
 1798    and password strings.
 1799 */
 1800 
 1801 static char *get_dsn_field (const char *tag, const char *src)
 1802 {
 1803     const char *p;
 1804     char needle[12];
 1805     char *ret = NULL;
 1806 
 1807     sprintf(needle, "%s=", tag);
 1808     p = strstr(src, needle);
 1809 
 1810     if (p != NULL) {
 1811     p += strlen(needle);
 1812     if (*p == '"' || *p == '\'') {
 1813         ret = gretl_quoted_string_strdup(p, NULL);
 1814     } else {
 1815         ret = gretl_strndup(p, strcspn(p, " "));
 1816     }
 1817     }
 1818 
 1819     return ret;
 1820 }
 1821 
 1822 static ODBC_info gretl_odinfo;
 1823 
 1824 static void ODBC_info_clear_read (void)
 1825 {
 1826     int i;
 1827 
 1828     free(gretl_odinfo.query);
 1829     gretl_odinfo.query = NULL;
 1830 
 1831     doubles_array_free(gretl_odinfo.X, gretl_odinfo.nvars);
 1832     gretl_odinfo.X = NULL;
 1833 
 1834     strings_array_free(gretl_odinfo.S, gretl_odinfo.nrows);
 1835     gretl_odinfo.S = NULL;
 1836 
 1837     gretl_string_table_destroy(gretl_odinfo.gst);
 1838     gretl_odinfo.gst = NULL;
 1839 
 1840     for (i=0; i<ODBC_OBSCOLS; i++) {
 1841     gretl_odinfo.coltypes[i] = 0;
 1842     }
 1843 
 1844     if (gretl_odinfo.fmts != NULL) {
 1845     strings_array_free(gretl_odinfo.fmts, gretl_odinfo.obscols);
 1846     gretl_odinfo.fmts = NULL;
 1847     }
 1848 
 1849     gretl_odinfo.nrows = 0;
 1850     gretl_odinfo.obscols = 0;
 1851     gretl_odinfo.nvars = 0;
 1852 }
 1853 
 1854 static void gretl_odbc_cleanup (void)
 1855 {
 1856     free(gretl_odinfo.dsn);
 1857     gretl_odinfo.dsn = NULL;
 1858 
 1859     free(gretl_odinfo.username);
 1860     gretl_odinfo.username = NULL;
 1861 
 1862     free(gretl_odinfo.password);
 1863     gretl_odinfo.password = NULL;
 1864 
 1865     ODBC_info_clear_read();
 1866 }
 1867 
 1868 int set_odbc_dsn (const char *line, PRN *prn)
 1869 {
 1870     int (*check_dsn) (ODBC_info *);
 1871     char *dbname = NULL;
 1872     char *uname = NULL;
 1873     char *pword = NULL;
 1874     int got_plugin = 0;
 1875     int err = 0;
 1876 
 1877     gretl_odbc_cleanup();
 1878 
 1879     dbname = get_dsn_field("dsn", line);
 1880     if (dbname == NULL) {
 1881     pputs(prn, "You must specify a DSN using 'dsn=...'\n");
 1882     return E_DATA;
 1883     }
 1884 
 1885     uname = get_dsn_field("user", line);
 1886     pword = get_dsn_field("password", line);
 1887 
 1888     gretl_odinfo.dsn = dbname;
 1889     gretl_odinfo.username = uname;
 1890     gretl_odinfo.password = pword;
 1891 
 1892     gretl_error_clear();
 1893 
 1894     check_dsn = get_plugin_function("gretl_odbc_check_dsn");
 1895 
 1896     if (check_dsn == NULL) {
 1897         err = 1;
 1898     } else {
 1899     got_plugin = 1;
 1900         err = (*check_dsn) (&gretl_odinfo);
 1901     }
 1902 
 1903     if (err) {
 1904     if (!got_plugin) {
 1905         pprintf(prn, "Couldn't open the gretl ODBC plugin\n");
 1906     } else {
 1907         pprintf(prn, "Failed to connect to ODBC data source '%s'\n",
 1908             gretl_odinfo.dsn);
 1909     }
 1910     gretl_odbc_cleanup();
 1911     } else if (gretl_messages_on()) {
 1912     pprintf(prn, "Connected to ODBC data source '%s'\n",
 1913         gretl_odinfo.dsn);
 1914     }
 1915 
 1916     return err;
 1917 }
 1918 
 1919 int db_set_sample (const char *start, const char *stop, DATASET *dset)
 1920 {
 1921     int t1 = 0, t2 = 0;
 1922 
 1923     if (strcmp(start, ";")) {
 1924     t1 = dateton(start, dset);
 1925     if (t1 < 0) {
 1926         return 1;
 1927     }
 1928     }
 1929 
 1930     t2 = dateton(stop, dset);
 1931     if (t2 < 0) {
 1932     return 1;
 1933     }
 1934 
 1935     if (t1 > t2) {
 1936     gretl_errmsg_set(_("Invalid null sample"));
 1937     return 1;
 1938     }
 1939 
 1940     dset->t1 = t1;
 1941     dset->t2 = t2;
 1942     dset->n = t2 - t1 + 1;
 1943     strcpy(dset->endobs, stop);
 1944 
 1945 #if DB_DEBUG
 1946     fprintf(stderr, "db_set_sample: t1=%d, t2=%d, stobs='%s', endobs='%s' "
 1947         "sd0 = %g, n = %d\n",
 1948         dset->t1, dset->t2,
 1949         dset->stobs, dset->endobs,
 1950         dset->sd0, dset->n);
 1951 #endif
 1952 
 1953     return 0;
 1954 }
 1955 
 1956 static const char *
 1957 get_word_and_advance (const char *s, char *word, size_t maxlen)
 1958 {
 1959     size_t i = 0;
 1960 
 1961     while (isspace(*s)) s++;
 1962 
 1963     *word = '\0';
 1964 
 1965     while (*s && !isspace(*s)) {
 1966     if (i < maxlen) word[i++] = *s;
 1967     s++;
 1968     }
 1969 
 1970     word[i] = '\0';
 1971 
 1972     return (*word != '\0')? s : NULL;
 1973 }
 1974 
 1975 static const char *
 1976 get_compact_method_and_advance (const char *s, CompactMethod *method)
 1977 {
 1978     const char *p;
 1979 
 1980     *method = COMPACT_NONE;
 1981 
 1982     if ((p = strstr(s, "(compact")) != NULL) {
 1983     char comp[8];
 1984     int i = 0;
 1985 
 1986     p += 8;
 1987     while (*p && *p != ')' && i < 7) {
 1988         if (!isspace(*p) && *p != '=') {
 1989         comp[i++] = *p;
 1990         }
 1991         p++;
 1992     }
 1993     comp[i] = '\0';
 1994 
 1995     if (!strcmp(comp, "average")) {
 1996         *method = COMPACT_AVG;
 1997     } else if (!strcmp(comp, "sum")) {
 1998         *method = COMPACT_SUM;
 1999     } else if (!strcmp(comp, "first")) {
 2000         *method = COMPACT_SOP;
 2001     } else if (!strcmp(comp, "last")) {
 2002         *method = COMPACT_EOP;
 2003     } else if (!strcmp(comp, "spread")) {
 2004         *method = COMPACT_SPREAD;
 2005     }
 2006 
 2007     p = strchr(p, ')');
 2008     if (p != NULL) p++;
 2009     } else if ((p = strstr(s, "data ")) != NULL) {
 2010     p += 5;
 2011     } else {
 2012     p = s;
 2013     }
 2014 
 2015     return p;
 2016 }
 2017 
 2018 static CompactMethod compact_method_from_option (int *err)
 2019 {
 2020     const char *s = get_optval_string(DATA, OPT_C);
 2021     CompactMethod method = COMPACT_NONE;
 2022 
 2023     if (s == NULL || *s == '\0') {
 2024     *err = E_PARSE;
 2025     } else if (!strcmp(s, "average")) {
 2026     method = COMPACT_AVG;
 2027     } else if (!strcmp(s, "sum")) {
 2028     method = COMPACT_SUM;
 2029     } else if (!strcmp(s, "first")) {
 2030     method = COMPACT_SOP;
 2031     } else if (!strcmp(s, "last")) {
 2032     method = COMPACT_EOP;
 2033     } else if (!strcmp(s, "spread")) {
 2034     method = COMPACT_SPREAD;
 2035     } else {
 2036     gretl_errmsg_sprintf(_("field '%s' in command is invalid"), s);
 2037     *err = E_PARSE;
 2038     }
 2039 
 2040     return method;
 2041 }
 2042 
 2043 /* 2-D array of doubles, allocated space in second
 2044    position (as in a DATASET) */
 2045 
 2046 static double **new_dbZ (int n)
 2047 {
 2048     double **Z;
 2049     int t;
 2050 
 2051     Z = malloc(2 * sizeof *Z);
 2052     if (Z == NULL) return NULL;
 2053 
 2054     Z[0] = NULL;
 2055     Z[1] = malloc(n * sizeof **Z);
 2056 
 2057     if (Z[1] == NULL) {
 2058     free(Z);
 2059     return NULL;
 2060     }
 2061 
 2062     for (t=0; t<n; t++) {
 2063     Z[1][t] = NADBL;
 2064     }
 2065 
 2066     return Z;
 2067 }
 2068 
 2069 static void free_dbZ (double **dbZ)
 2070 {
 2071     if (dbZ != NULL) {
 2072     free(dbZ[1]);
 2073     free(dbZ);
 2074     }
 2075 }
 2076 
 2077 static int parse_odbc_format_chunk (char **ps, int i)
 2078 {
 2079     const char *numchars = "0123456789";
 2080     char *chunk = NULL;
 2081     char *p = *ps;
 2082     int n, err = 0;
 2083 
 2084     /* advance to '%' */
 2085     while (*p && *p != '%') p++;
 2086     if (*p == '\0') {
 2087     return E_PARSE;
 2088     }
 2089 
 2090     p++; /* move past '%' */
 2091 
 2092     /* zero padding? */
 2093     if (*p == '0') {
 2094     p++;
 2095     }
 2096 
 2097     /* optional width? */
 2098     n = strspn(p, numchars);
 2099     if (n == 1) {
 2100     p++;
 2101     } else if (n > 0) {
 2102     return E_PARSE;
 2103     }
 2104 
 2105     /* optional dot plus precision? */
 2106     if (*p == '.') {
 2107     p++;
 2108     n = strspn(p, numchars);
 2109     if (n == 1) {
 2110         p++;
 2111     } else {
 2112         return E_PARSE;
 2113     }
 2114     }
 2115 
 2116     /* now we should have a conversion character */
 2117     if (*p == 'd') {
 2118     gretl_odinfo.coltypes[i] = GRETL_TYPE_INT;
 2119     } else if (*p == 's') {
 2120     gretl_odinfo.coltypes[i] = GRETL_TYPE_STRING;
 2121     } else if (*p == 'f' || *p == 'g') {
 2122     gretl_odinfo.coltypes[i] = GRETL_TYPE_DOUBLE;
 2123     } else if (*p == 'D') {
 2124     *p = 's';
 2125     gretl_odinfo.coltypes[i] = GRETL_TYPE_DATE;
 2126     } else {
 2127     return E_PARSE;
 2128     }
 2129 
 2130     /* append any trailing fixed chars */
 2131     p++;
 2132     while (*p && *p != '%') p++;
 2133     n = p - *ps;
 2134 
 2135     chunk = gretl_strndup(*ps, n);
 2136     if (chunk == NULL) {
 2137     err = E_ALLOC;
 2138     } else {
 2139     err = strings_array_add(&gretl_odinfo.fmts,
 2140                 &gretl_odinfo.obscols,
 2141                 chunk);
 2142     free(chunk);
 2143     }
 2144 
 2145     *ps = p;
 2146 
 2147 #if 1
 2148     fprintf(stderr, "set obs coltype[%d] = %d (%s), fmt='%s'\n", i,
 2149         gretl_odinfo.coltypes[i],
 2150         gretl_type_get_name(gretl_odinfo.coltypes[i]),
 2151         gretl_odinfo.fmts[i]);
 2152 #endif
 2153 
 2154     return err;
 2155 }
 2156 
 2157 static int parse_odbc_format (char *fmt)
 2158 {
 2159     char *s = fmt;
 2160     int i, err = 0;
 2161 
 2162     for (i=0; i<ODBC_OBSCOLS && !err && *s; i++) {
 2163     err = parse_odbc_format_chunk(&s, i);
 2164     }
 2165 
 2166     if (!err && *s != '\0') {
 2167     err = E_PARSE;
 2168     }
 2169 
 2170     free(fmt);
 2171 
 2172     return err;
 2173 }
 2174 
 2175 static char *odbc_get_query (const char *s, int *err)
 2176 {
 2177     char *query = NULL;
 2178     const char *p;
 2179 
 2180     if (*s == '"') {
 2181     query = gretl_quoted_string_strdup(s, NULL);
 2182     } else {
 2183     p = get_string_by_name(s);
 2184     if (p != NULL) {
 2185         query = gretl_strdup(p);
 2186     } else {
 2187         query = gretl_strdup(s);
 2188     }
 2189     }
 2190 
 2191     if (query == NULL) {
 2192     *err = E_ALLOC;
 2193     } else if (*query == '\0') {
 2194     gretl_errmsg_set(_("Expected an SQL query string"));
 2195     *err = E_PARSE;
 2196     }
 2197 
 2198     return query;
 2199 }
 2200 
 2201 /* Grab the series name(s) out of an ODBC "data" command.  If the SQL
 2202    query is marked by "query=" (which was not required in the original
 2203    gretl ODBC setup) we're able to get multiple series names,
 2204    otherwise we're restricted to one.
 2205 */
 2206 
 2207 static char **odbc_get_varnames (const char **line, int *err)
 2208 {
 2209     char **vnames = NULL;
 2210     char vname[VNAMELEN];
 2211     const char *s = *line;
 2212     int len, loop_ok = 0, nv = 0;
 2213 
 2214     if (strstr(s, "query=")) {
 2215     /* we know where the SQL query starts */
 2216     loop_ok = 1;
 2217     }
 2218 
 2219     while (!*err) {
 2220     *vname = '\0';
 2221     *err = extract_varname(vname, s, &len);
 2222 
 2223     if (!*err && len == 0) {
 2224         gretl_errmsg_set(_("Expected a valid variable name"));
 2225         *err = E_PARSE;
 2226     }
 2227 
 2228     if (!*err) {
 2229         *err = check_varname(vname);
 2230     }
 2231 
 2232     if (!*err) {
 2233         *err = strings_array_add(&vnames, &nv, vname);
 2234     }
 2235 
 2236     if (!*err) {
 2237         s += len;
 2238         s += strspn(s, " ");
 2239     }
 2240 
 2241     if (!loop_ok || *s == '\0' || !strncmp(s, "obs-", 4) ||
 2242         !strncmp(s, "query=", 6)) {
 2243         /* got to the end of the varnames section */
 2244         break;
 2245     }
 2246     }
 2247 
 2248     if (*err) {
 2249     strings_array_free(vnames, nv);
 2250     vnames = NULL;
 2251     } else {
 2252     gretl_odinfo.nvars = nv;
 2253     }
 2254 
 2255     *line = s;
 2256 
 2257     return vnames;
 2258 }
 2259 
 2260 static double s_tab_get (int i, int t, series_table *stl, series_table *str)
 2261 {
 2262     const double *x = gretl_odinfo.X[i];
 2263     const char *sr;
 2264     double ret = NADBL;
 2265 
 2266     /* get the string value for the imported obs */
 2267     sr = series_table_get_string(str, x[t]);
 2268     /* look up its index "on the left" */
 2269     ret = series_table_get_value(stl, sr);
 2270     if (na(ret)) {
 2271     /* not found: so try adding it to the LHS table */
 2272     series_table_add_string(stl, sr);
 2273     ret = series_table_get_value(stl, sr);
 2274     }
 2275 
 2276     return ret;
 2277 }
 2278 
 2279 static int m2q (int m)
 2280 {
 2281     if (m == 1) return 1;
 2282     else if (m == 4) return 2;
 2283     else if (m == 7) return 3;
 2284     else if (m == 10) return 4;
 2285     else return -1;
 2286 }
 2287 
 2288 static int try_iso_8601 (const char *s, DATASET *dset)
 2289 {
 2290     int t = -1;
 2291 
 2292     if (dataset_is_time_series(dset)) {
 2293     char obsstr[OBSLEN] = {0};
 2294     int y, m, d;
 2295 
 2296     if (sscanf(s, "%d-%d-%d", &y, &m, &d) == 3) {
 2297         if (dset->pd == 4 && d == 1) {
 2298         sprintf(obsstr, "%04d:%d", y, m2q(m));
 2299         } else if (dset->pd == 12 && d == 1) {
 2300         sprintf(obsstr, "%04d:%02d", y, m);
 2301         } else if (dset->pd == 1 && m == 1 && d == 1) {
 2302         sprintf(obsstr, "%04d", y);
 2303         }
 2304         t = dateton(obsstr, dset);
 2305     }
 2306     }
 2307 
 2308     return t;
 2309 }
 2310 
 2311 static int odbc_transcribe_data (char **vnames, DATASET *dset,
 2312                  int vmin, int newvars,
 2313                  gretlopt opt, PRN *prn)
 2314 {
 2315     char label[MAXLABEL];
 2316     int *gstlist = NULL;
 2317     int nv = gretl_odinfo.nvars;
 2318     int n = gretl_odinfo.nrows;
 2319     int nrepl = nv - newvars;
 2320     int simple_fill = (opt & OPT_F);
 2321     int i, s, t, v;
 2322     int err = 0;
 2323 
 2324     if (gretl_odinfo.gst != NULL) {
 2325     gstlist = string_table_copy_list(gretl_odinfo.gst);
 2326     }
 2327 
 2328     for (i=0; i<nv && !err; i++) {
 2329     series_table *str = NULL;
 2330     series_table *stl = NULL;
 2331     int vnew = 1; /* is this a new series? */
 2332     int obs_used = 0;
 2333 
 2334     if (nrepl > 0) {
 2335         /* we're replacing some series */
 2336         v = current_series_index(dset, vnames[i]);
 2337     } else {
 2338         /* all the series are new */
 2339         v = -1;
 2340     }
 2341 
 2342     if (v < 0) {
 2343         /* a new series */
 2344         v = vmin++;
 2345         strcpy(dset->varname[v], vnames[i]);
 2346         sprintf(label, "ODBC series %d", i + 1);
 2347         series_set_label(dset, v, label);
 2348     } else {
 2349         /* an existing series */
 2350         vnew = 0;
 2351         stl = series_get_string_table(dset, v);
 2352     }
 2353 
 2354     if (in_gretl_list(gstlist, i+1)) {
 2355         /* the imported data are string-valued */
 2356         if (vnew) {
 2357         gretl_string_table_reset_column_id(gretl_odinfo.gst, i+1, v);
 2358         } else if (stl == NULL) {
 2359         gretl_errmsg_sprintf("%s: can't mix numeric and string data",
 2360                      dset->varname[v]);
 2361         err = E_TYPES;
 2362         } else {
 2363         str = gretl_string_table_detach_col(gretl_odinfo.gst, i+1);
 2364         }
 2365         if (!err && gretl_messages_on()) {
 2366         pprintf(prn, "%s: string-valued\n", dset->varname[v]);
 2367         }
 2368     } else if (stl != NULL) {
 2369         /* string-valued in dataset, numeric data from ODBC */
 2370         gretl_errmsg_sprintf("%s: can't mix numeric and string data",
 2371                  dset->varname[v]);
 2372         err = E_TYPES;
 2373     }
 2374 
 2375     if (err) {
 2376         break;
 2377     }
 2378 
 2379     if (gretl_odinfo.S != NULL) {
 2380         /* got obs identifiers via ODBC */
 2381         if (vnew) {
 2382         for (t=0; t<dset->n; t++) {
 2383             dset->Z[v][t] = NADBL;
 2384         }
 2385         }
 2386         for (s=0; s<n; s++) {
 2387         t = dateton(gretl_odinfo.S[s], dset);
 2388         if (t < 0) {
 2389             t = try_iso_8601(gretl_odinfo.S[s], dset);
 2390         }
 2391         if (t >= dset->t1 && t <= dset->t2) {
 2392             if (str != NULL) {
 2393             dset->Z[v][t] = s_tab_get(i, s, stl, str);
 2394             } else {
 2395             dset->Z[v][t] = gretl_odinfo.X[i][s];
 2396             }
 2397             obs_used++;
 2398         } else {
 2399             fprintf(stderr, "Rejecting obs '%s'\n", gretl_odinfo.S[s]);
 2400         }
 2401         }
 2402     } else {
 2403         /* no obs identifiers via ODBC */
 2404         int ns = dset->t2 - dset->t1 + 1;
 2405 
 2406         if (n == ns || simple_fill) {
 2407         s = 0;
 2408         } else if (n == dset->n) {
 2409         s = dset->t1;
 2410         } else {
 2411         gretl_errmsg_sprintf("%s: don't know how to align the data!",
 2412                      dset->varname[v]);
 2413         err = E_DATA;
 2414         }
 2415         for (t=0; t<dset->n && !err; t++) {
 2416         if (t >= dset->t1 && t <= dset->t2 && s < n) {
 2417             if (str != NULL) {
 2418             dset->Z[v][t] = s_tab_get(i, s++, stl, str);
 2419             } else {
 2420             dset->Z[v][t] = gretl_odinfo.X[i][s++];
 2421             }
 2422             obs_used++;
 2423         } else if (vnew) {
 2424             dset->Z[v][t] = NADBL;
 2425         }
 2426         }
 2427     }
 2428 
 2429     if (str != NULL) {
 2430         series_table_destroy(str);
 2431     }
 2432 
 2433     if (!err && vnew && obs_used == 0) {
 2434         gretl_warnmsg_sprintf("ODBC import: '%s': no valid observations in sample range",
 2435                   vnames[i]);
 2436     }
 2437     }
 2438 
 2439     free(gstlist);
 2440 
 2441     if (err) {
 2442     dataset_drop_last_variables(dset, newvars);
 2443     if (gretl_odinfo.gst != NULL) {
 2444         gretl_string_table_destroy(gretl_odinfo.gst);
 2445         gretl_odinfo.gst = NULL;
 2446     }
 2447     } else if (gretl_odinfo.gst != NULL) {
 2448     gretl_string_table_save(gretl_odinfo.gst, dset);
 2449     }
 2450 
 2451     return err;
 2452 }
 2453 
 2454 static int odbc_count_new_vars (char **vnames, int nv,
 2455                 const DATASET *dset)
 2456 {
 2457     int newv = nv;
 2458 
 2459     if (dset->v > 0) {
 2460     int i;
 2461 
 2462     for (i=0; i<nv; i++) {
 2463         if (current_series_index(dset, vnames[i]) > 0) {
 2464         newv--;
 2465         }
 2466     }
 2467     }
 2468 
 2469     return newv;
 2470 }
 2471 
 2472 /* data series [obs-format=format-string] [query=]query-string */
 2473 
 2474 static int odbc_get_series (const char *line, DATASET *dset,
 2475                 gretlopt opt, PRN *prn)
 2476 {
 2477     int (*get_data) (ODBC_info *, gretlopt, PRN *);
 2478     char **vnames = NULL;
 2479     char *format = NULL;
 2480     int err = 0;
 2481 
 2482     if (gretl_odinfo.dsn == NULL) {
 2483     gretl_errmsg_set(_("No database has been opened"));
 2484     return 1;
 2485     } else if (dset->n == 0) {
 2486     return E_NODATA;
 2487     }
 2488 
 2489     /* get "series" field */
 2490     vnames = odbc_get_varnames(&line, &err);
 2491     if (err) {
 2492     return err;
 2493     }
 2494 
 2495     /* optional "obs-format" field */
 2496     if (!strncmp(line, "obs-format=", 11)) {
 2497     line += 11;
 2498     format = gretl_quoted_string_strdup(line, (const char **) &line);
 2499     if (format == NULL) {
 2500         err = E_PARSE;
 2501     } else {
 2502         err = parse_odbc_format(format);
 2503     }
 2504     }
 2505 
 2506     /* now the query to pass to the database */
 2507     if (!err) {
 2508     line += strspn(line, " ");
 2509     if (!strncmp(line, "query=", 6)) {
 2510         line += 6;
 2511     }
 2512     gretl_odinfo.query = odbc_get_query(line, &err);
 2513     }
 2514 
 2515     if (!err) {
 2516     if (opt & OPT_V) {
 2517         pprintf(prn, "SQL query: '%s'\n", gretl_odinfo.query);
 2518     }
 2519     gretl_error_clear();
 2520 
 2521     get_data = get_plugin_function("gretl_odbc_get_data");
 2522 
 2523     if (get_data == NULL) {
 2524         err = 1;
 2525     } else {
 2526         err = (*get_data) (&gretl_odinfo, opt, prn);
 2527     }
 2528     }
 2529 
 2530     if (!err) {
 2531     int n = gretl_odinfo.nrows;
 2532     int nv = gretl_odinfo.nvars;
 2533     int newvars, vmin = 1;
 2534 
 2535     if (gretl_messages_on()) {
 2536         pprintf(prn, "Retrieved %d observations on %d series via ODBC\n",
 2537             n, nv);
 2538     }
 2539 
 2540     if (dset->v == 0) {
 2541         /* the data array is still empty */
 2542         newvars = nv;
 2543         dset->v = 1 + nv;
 2544         err = start_new_Z(dset, 0);
 2545     } else {
 2546         newvars = odbc_count_new_vars(vnames, nv, dset);
 2547         vmin = dset->v;
 2548         if (newvars > 0) {
 2549         err = dataset_add_series(dset, newvars);
 2550         }
 2551     }
 2552 
 2553     if (!err) {
 2554         err = odbc_transcribe_data(vnames, dset, vmin, newvars, opt, prn);
 2555     }
 2556     }
 2557 
 2558     strings_array_free(vnames, gretl_odinfo.nvars);
 2559     ODBC_info_clear_read();
 2560 
 2561     return err;
 2562 }
 2563 
 2564 /* dbnomics function in separate file */
 2565 
 2566 #include "dbnread.c"
 2567 
 2568 /* called from loop in db_get_series() */
 2569 
 2570 static int get_one_db_series (const char *sername,
 2571                   const char *altname,
 2572                   DATASET *dset,
 2573                   CompactMethod cmethod,
 2574                   int interpolate,
 2575                   const char *idxname,
 2576                   PRN *prn)
 2577 {
 2578     CompactMethod this_method = cmethod;
 2579     const char *impname;
 2580     SERIESINFO sinfo; /* sinfo declared */
 2581     double **dbZ;
 2582     int v, err = 0;
 2583 
 2584     series_info_init(&sinfo);
 2585 
 2586     /* are we using a specified name for importation? */
 2587     impname = (*altname == '\0')? sername : altname;
 2588 
 2589     /* see if the series is already in the dataset */
 2590     v = series_index(dset, impname);
 2591     if (v < dset->v && cmethod == COMPACT_NONE) {
 2592     this_method = series_get_compact_method(dset, v);
 2593     }
 2594 
 2595 #if DB_DEBUG
 2596     fprintf(stderr, "get_one_db_series: dset->v=%d, v=%d, name='%s'\n",
 2597         dset->v, v, impname);
 2598     fprintf(stderr, "this_var_method = %d\n", this_method);
 2599 #endif
 2600 
 2601     /* find the series information in the database */
 2602     if (saved_db_type == GRETL_DBNOMICS) {
 2603     err = get_dbnomics_series_info(sername, &sinfo);
 2604     } else if (saved_db_type == GRETL_RATS_DB) {
 2605     err = get_rats_series_info(sername, &sinfo);
 2606     } else if (saved_db_type == GRETL_PCGIVE_DB) {
 2607     err = get_pcgive_series_info(sername, &sinfo);
 2608     } else {
 2609     err = get_native_series_info(sername, &sinfo, idxname);
 2610     }
 2611 
 2612     if (err) {
 2613     fprintf(stderr, "get_one_db_series: failed to get series info\n");
 2614     return err;
 2615     }
 2616 
 2617     /* temporary data array */
 2618     dbZ = new_dbZ(sinfo.nobs);
 2619     if (dbZ == NULL) {
 2620     gretl_errmsg_set(_("Out of memory!"));
 2621     return E_ALLOC;
 2622     }
 2623 
 2624 #if DB_DEBUG
 2625     fprintf(stderr, "get_one_db_series: offset=%d, nobs=%d\n",
 2626         sinfo.offset, sinfo.nobs);
 2627 #endif
 2628 
 2629     if (saved_db_type == GRETL_DBNOMICS) {
 2630     err = get_dbnomics_data(saved_db_name, &sinfo, dbZ);
 2631     } else if (saved_db_type == GRETL_RATS_DB) {
 2632     err = get_rats_db_data(saved_db_name, &sinfo, dbZ);
 2633     } else if (saved_db_type == GRETL_PCGIVE_DB) {
 2634     err = get_pcgive_db_data(saved_db_name, &sinfo, dbZ);
 2635 #ifdef USE_CURL
 2636     } else if (saved_db_type == GRETL_NATIVE_DB_WWW) {
 2637     err = get_remote_db_data(saved_db_name, &sinfo, dbZ);
 2638 #endif
 2639     } else {
 2640     err = get_native_db_data(saved_db_name, &sinfo, dbZ);
 2641     }
 2642 
 2643 #if DB_DEBUG
 2644     fprintf(stderr, "get_one_db_series: get_db_data gave %d\n", err);
 2645 #endif
 2646 
 2647     if (err == DB_MISSING_DATA) {
 2648     fprintf(stderr, "There were missing data\n");
 2649     err = 0;
 2650     }
 2651 
 2652 #if DB_DEBUG
 2653     fprintf(stderr, "sinfo.varname='%s', this_method=%d, interpolate=%d\n",
 2654         sinfo.varname, this_method, interpolate);
 2655 #endif
 2656 
 2657     if (!err) {
 2658     if (*altname != '\0') {
 2659         /* switch the recorded name now */
 2660         strcpy(sinfo.varname, altname);
 2661     }
 2662     if (this_method == COMPACT_SPREAD) {
 2663         err = lib_spread_db_data(dbZ, &sinfo, dset, prn);
 2664     } else {
 2665         err = lib_add_db_data(dbZ, &sinfo, dset, this_method,
 2666                   interpolate, v, prn);
 2667     }
 2668     }
 2669 
 2670     series_info_clear(&sinfo);
 2671     free_dbZ(dbZ);
 2672 
 2673     return err;
 2674 }
 2675 
 2676 static int is_glob (const char *s)
 2677 {
 2678     return strchr(s, '*') || strchr(s, '?');
 2679 }
 2680 
 2681 static int process_import_name_option (char *vname)
 2682 {
 2683     const char *s = get_optval_string(DATA, OPT_N);
 2684     int err = 0;
 2685 
 2686     if (s == NULL) {
 2687     err = E_DATA;
 2688     } else {
 2689     err = check_varname(s);
 2690     }
 2691 
 2692     if (!err) {
 2693     strcpy(vname, s);
 2694     }
 2695 
 2696     return err;
 2697 }
 2698 
 2699 /* main function for getting one or more series out of a
 2700    database (including ODBC) via command-line/script
 2701 */
 2702 
 2703 int db_get_series (const char *line, DATASET *dset,
 2704            gretlopt opt, PRN *prn)
 2705 {
 2706     char altname[VNAMELEN] = {0};
 2707     char **vnames = NULL;
 2708     char *idxname = NULL;
 2709     CompactMethod cmethod;
 2710     int i, nnames = 0;
 2711     int from_scratch = 0;
 2712     int interpolate = 0;
 2713     int err = 0;
 2714 
 2715     if (opt & OPT_O) {
 2716     return odbc_get_series(line, dset, opt, prn);
 2717     }
 2718 
 2719     if (opt & OPT_N) {
 2720     /* --name=whatever */
 2721     err = process_import_name_option(altname);
 2722     if (err) {
 2723         return err;
 2724     }
 2725     }
 2726 
 2727 #if DB_DEBUG
 2728     fprintf(stderr, "db_get_series: line='%s', dset=%p\n",
 2729         line, (void *) dset);
 2730     fprintf(stderr, "db_name = '%s'\n", saved_db_name);
 2731 #endif
 2732 
 2733     if (*saved_db_name == '\0') {
 2734     gretl_errmsg_set(_("No database has been opened"));
 2735     return 1;
 2736     }
 2737 
 2738     from_scratch = (dset->n == 0);
 2739 
 2740     if (opt & OPT_C) {
 2741     /* new-style: compaction method supplied as option */
 2742     cmethod = compact_method_from_option(&err);
 2743     } else {
 2744     /* legacy */
 2745     line = get_compact_method_and_advance(line, &cmethod);
 2746     }
 2747 
 2748     if (!err) {
 2749     if (string_is_blank(line)) {
 2750         err = E_DATA;
 2751     } else {
 2752         /* get the variable names on the line */
 2753         vnames = gretl_string_split(line, &nnames, NULL);
 2754         if (vnames == NULL) {
 2755         err = E_ALLOC;
 2756         }
 2757     }
 2758     }
 2759 
 2760     if (!err && nnames > 1 && *altname != '\0') {
 2761     /* altname only works for a single series? */
 2762     err = E_BADOPT;
 2763     }
 2764 
 2765     if (!err) {
 2766     if (saved_db_type == GRETL_NATIVE_DB) {
 2767         idxname = native_db_index_name();
 2768     } else if (saved_db_type == GRETL_NATIVE_DB_WWW) {
 2769 #ifdef USE_CURL
 2770         idxname = g_strdup_printf("%sdbtmp.idx", gretl_dotdir());
 2771         err = remote_db_index_to_file(idxname);
 2772 #endif
 2773     }
 2774     }
 2775 
 2776     if (!err && (opt & OPT_I)) {
 2777     interpolate = 1;
 2778     }
 2779 
 2780     /* now process the imports individually */
 2781 
 2782     for (i=0; i<nnames && !err; i++) {
 2783     if (is_glob(vnames[i])) {
 2784         /* globbing works only for native databases */
 2785         if (*altname != '\0') {
 2786         /* can't do it */
 2787         err = E_BADOPT;
 2788         } else if (saved_db_type == GRETL_NATIVE_DB ||
 2789                saved_db_type == GRETL_NATIVE_DB_WWW) {
 2790         char **tmp;
 2791         int j, nmatch;
 2792 
 2793         tmp = native_db_match_series(vnames[i], &nmatch,
 2794                          idxname, &err);
 2795         for (j=0; j<nmatch && !err; j++) {
 2796             err = get_one_db_series(tmp[j], altname, dset,
 2797                         cmethod, interpolate,
 2798                         idxname, prn);
 2799         }
 2800         strings_array_free(tmp, nmatch);
 2801         } else {
 2802         err = E_INVARG;
 2803         }
 2804     } else {
 2805         err = get_one_db_series(vnames[i], altname, dset,
 2806                     cmethod, interpolate,
 2807                     idxname, prn);
 2808     }
 2809     }
 2810 
 2811     strings_array_free(vnames, nnames);
 2812 
 2813     if (!err && !(opt & OPT_Q) && gretl_messages_on()) {
 2814     pprintf(prn, _("Series imported OK"));
 2815     pputc(prn, '\n');
 2816     if (from_scratch) {
 2817         print_smpl(dset, 0, OPT_NONE, prn);
 2818     }
 2819     }
 2820 
 2821     if (idxname != NULL) {
 2822     if (saved_db_type == GRETL_NATIVE_DB_WWW) {
 2823         /* this file is a temporary download */
 2824         gretl_remove(idxname);
 2825     }
 2826     free(idxname);
 2827     }
 2828 
 2829     return err;
 2830 }
 2831 
 2832 static FILE *tempfile_open (char *fname, int *err)
 2833 {
 2834     FILE *fp;
 2835 
 2836     strcat(fname, ".XXXXXX");
 2837     fp = gretl_mktemp(fname, "w+");
 2838     if (fp == NULL) {
 2839     *err = E_FOPEN;
 2840     }
 2841 
 2842     return fp;
 2843 }
 2844 
 2845 static void maybe_fclose (FILE *fp)
 2846 {
 2847     if (fp != NULL) {
 2848     fclose(fp);
 2849     }
 2850 }
 2851 
 2852 #define DBUFLEN 1024
 2853 
 2854 static int db_delete_series (const char *line, const int *list,
 2855                  const char *fname, PRN *prn)
 2856 {
 2857     dbnumber buf[DBUFLEN];
 2858     char src1[FILENAME_MAX];
 2859     char src2[FILENAME_MAX];
 2860     char tmp1[FILENAME_MAX];
 2861     char tmp2[FILENAME_MAX];
 2862     char series[VNAMELEN];
 2863     char *p, s[512];
 2864     char **snames = NULL;
 2865     FILE *fidx = NULL, *fbin = NULL;
 2866     FILE *f1 = NULL, *f2 = NULL;
 2867     int i, j, k, print, n, ns;
 2868     int ndel = 0;
 2869     int err = 0;
 2870 
 2871     if (fname == NULL) {
 2872     if (*saved_db_name == '\0') {
 2873         gretl_errmsg_set(_("No database has been opened"));
 2874         err = 1;
 2875     } else if (saved_db_type != GRETL_NATIVE_DB) {
 2876         gretl_errmsg_set("This only works for gretl databases");
 2877         err = 1;
 2878     } else {
 2879         err = open_native_db_files(saved_db_name, &fidx, src1, &fbin, src2);
 2880     }
 2881     } else {
 2882     err = open_native_db_files(fname, &fidx, src1, &fbin, src2);
 2883     }
 2884 
 2885     if (err) {
 2886     return err;
 2887     }
 2888 
 2889     strcpy(tmp1, gretl_dotdir());
 2890     strcat(tmp1, "tmpidx");
 2891     f1 = tempfile_open(tmp1, &err);
 2892     if (err) {
 2893     goto bailout;
 2894     }
 2895 
 2896     strcpy(tmp2, gretl_dotdir());
 2897     strcat(tmp2, "tmpbin");
 2898     f2 = tempfile_open(tmp2, &err);
 2899     if (err) {
 2900     goto bailout;
 2901     }
 2902 
 2903     if (line != NULL) {
 2904     /* extract the variable names given on the line */
 2905     ns = 0;
 2906     while ((line = get_word_and_advance(line, series, VNAMELEN-1))
 2907            && !err) {
 2908         err = strings_array_add(&snames, &ns, series);
 2909     }
 2910     if (!err && ns == 0) {
 2911         fprintf(stderr, "Found no series names\n");
 2912         err = E_PARSE;
 2913     }
 2914     }
 2915 
 2916     print = k = 1;
 2917     i = j = 0;
 2918 
 2919     while (fgets(s, sizeof s, fidx) && !err) {
 2920     if (i == 0) {
 2921         /* always reprint the header */
 2922         fputs(s, f1);
 2923         i++;
 2924         continue;
 2925     }
 2926 
 2927     if (i % 2 != 0) {
 2928         /* odd lines contain varnames */
 2929         print = 1;
 2930         if (snames != NULL) {
 2931         sscanf(s, "%s", series);
 2932         for (j=0; j<ns; j++) {
 2933             if (!strcmp(series, snames[j])) {
 2934             print = 0;
 2935             ndel++;
 2936             break;
 2937             }
 2938         }
 2939         } else {
 2940         if (k <= list[0] && list[k] == j) {
 2941             k++;
 2942             print = 0;
 2943             ndel++;
 2944         }
 2945         j++;
 2946         }
 2947         if (print) {
 2948         fputs(s, f1);
 2949         }
 2950     } else {
 2951         /* even lines have obs information */
 2952         p = strstr(s, "n = ");
 2953         if (p != NULL) {
 2954         sscanf(p + 4, "%d", &n);
 2955         } else {
 2956         err = E_DATA;
 2957         fprintf(stderr, "couldn't find obs for series\n");
 2958         }
 2959 
 2960         if (!print) {
 2961         fseek(fbin, n * sizeof(dbnumber), SEEK_CUR);
 2962         } else {
 2963         int get, got, rem = n;
 2964 
 2965         fputs(s, f1);
 2966 
 2967         while (rem > 0 && !err) {
 2968             get = (rem > DBUFLEN)? DBUFLEN : rem;
 2969             got = fread(buf, sizeof(dbnumber), get, fbin);
 2970             if (got != get) {
 2971             fprintf(stderr, "error reading binary data\n");
 2972             err = E_DATA;
 2973             } else {
 2974             fwrite(buf, sizeof(dbnumber), got, f2);
 2975             rem -= got;
 2976             }
 2977         }
 2978         }
 2979     }
 2980     i++;
 2981     }
 2982 
 2983     if (snames != NULL) {
 2984     strings_array_free(snames, ns);
 2985     }
 2986 
 2987  bailout:
 2988 
 2989     maybe_fclose(fidx);
 2990     maybe_fclose(fbin);
 2991     maybe_fclose(f1);
 2992     maybe_fclose(f2);
 2993 
 2994     if (!err && ndel > 0) {
 2995     err = gretl_rename(tmp1, src1);
 2996     if (!err) {
 2997         err = gretl_rename(tmp2, src2);
 2998     }
 2999     } else {
 3000     gretl_remove(tmp1);
 3001     gretl_remove(tmp2);
 3002     }
 3003 
 3004     if (!err && prn != NULL) {
 3005     pprintf(prn, "Deleted %d series from %s\n", ndel, src2);
 3006     }
 3007 
 3008     return err;
 3009 }
 3010 
 3011 int db_delete_series_by_name (const char *line, PRN *prn)
 3012 {
 3013     return db_delete_series(line, NULL, NULL, prn);
 3014 }
 3015 
 3016 int db_delete_series_by_number (const int *list, const char *fname)
 3017 {
 3018     return db_delete_series(NULL, list, fname, NULL);
 3019 }
 3020 
 3021 static void obs_to_ymd (const char *obs, int pd, int *y, int *m, int *d)
 3022 {
 3023     *y = atoi(obs);
 3024     *d = 1;
 3025 
 3026     if (pd == 12) {
 3027     *m = atoi(obs + 5);
 3028     } else if (pd == 4) {
 3029     int q = atoi(obs + 5);
 3030 
 3031     *m = q * 3 - 2;
 3032     } else {
 3033     *m = 1;
 3034     }
 3035 }
 3036 
 3037 int db_range_check (int db_pd,
 3038             const char *db_stobs,
 3039             const char *db_endobs,
 3040             const char *varname,
 3041             DATASET *dset)
 3042 {
 3043     double sd0_orig, sdn_orig, sd0, sdn;
 3044     int err = 0;
 3045 
 3046     sd0 = get_date_x(db_pd, db_stobs);
 3047     sdn = get_date_x(db_pd, db_endobs);
 3048 
 3049     if (db_pd >= 5 && db_pd <= 7 && !dated_daily_data(dset)) {
 3050     /* convert 'orig' info to daily dates */
 3051     int y, m, d;
 3052 
 3053     obs_to_ymd(dset->stobs, dset->pd, &y, &m, &d);
 3054     sd0_orig = epoch_day_from_ymd(y, m, d);
 3055     obs_to_ymd(dset->endobs, dset->pd, &y, &m, &d);
 3056     sdn_orig = epoch_day_from_ymd(y, m, d);
 3057     } else {
 3058     sd0_orig = dset->sd0;
 3059     sdn_orig = get_date_x(dset->pd, dset->endobs);
 3060     }
 3061 
 3062     if (sd0 > sdn_orig || sdn < sd0_orig) {
 3063     gretl_errmsg_sprintf(_("%s: observation range does not overlap\n"
 3064                    "with the working data set"),
 3065                  varname);
 3066     err = 1;
 3067     }
 3068 
 3069     return err;
 3070 }
 3071 
 3072 int check_db_import_conversion (int pd, DATASET *dset)
 3073 {
 3074     int target = dset->pd;
 3075     int err = 0;
 3076 
 3077     if (pd == target) {
 3078     ; /* no conversion needed */
 3079     } else if (pd == 1 && target == 4) {
 3080     ; /* annual to quarterly expansion */
 3081     } else if (pd == 1 && target == 12) {
 3082     ; /* annual to monthly expansion */
 3083     } else if (pd == 4 && target == 12) {
 3084     ; /* quarterly to monthly expansion */
 3085     } else if (pd == 12 && target == 1) {
 3086     ; /* monthly to annual compaction */
 3087     } else if (pd == 4 && target == 1) {
 3088     ; /* quarterly to annual compaction */
 3089     } else if (pd == 12 && target == 4) {
 3090     ; /* monthly to quarterly compaction */
 3091     } else {
 3092     fprintf(stderr, "db import fail: pd = %d, target %d\n", pd, target);
 3093     err = E_DATA;
 3094     }
 3095 
 3096     return err;
 3097 }
 3098 
 3099 static int check_db_import_full (int pd,
 3100                  const char *stobs,
 3101                  const char *endobs,
 3102                  const char *varname,
 3103                  DATASET *dset)
 3104 {
 3105     int err = check_db_import_conversion(pd, dset);
 3106 
 3107     if (err) {
 3108     gretl_errmsg_sprintf(_("%s: can't handle conversion"),
 3109                  varname);
 3110     } else {
 3111     err = db_range_check(pd, stobs, endobs, varname, dset);
 3112     }
 3113 
 3114 #if DB_DEBUG
 3115     if (err) {
 3116     fprintf(stderr, "check_db_import_full: err = %d\n", err);
 3117     fprintf(stderr, "(dset->n = %d)\n", dset->n);
 3118     }
 3119 #endif
 3120 
 3121     return err;
 3122 }
 3123 
 3124 /* We'll do "spread" compaction for monthly to quarterly or annual,
 3125    quarterly to annual, or daily to monthly or quarterly. Other
 3126    cases are rejected.
 3127 */
 3128 
 3129 static int compact_spread_pd_check (int high, int low)
 3130 {
 3131     if ((low == 12 || low == 4) &&
 3132     (high == 5 || high == 6 || high == 7)) {
 3133     /* daily to monthly or quarterly */
 3134     return 0;
 3135     }
 3136 
 3137     if (!(high == 12 && low == 1) &&
 3138     !(high == 12 && low == 4) &&
 3139     !(high == 4 && low == 1)) {
 3140     gretl_errmsg_set("Unsupported conversion");
 3141     return E_DATA;
 3142     }
 3143 
 3144     return 0;
 3145 }
 3146 
 3147 static void
 3148 init_datainfo_from_sinfo (DATASET *dset, SERIESINFO *sinfo)
 3149 {
 3150     dset->pd = sinfo->pd;
 3151 
 3152     strcpy(dset->stobs, sinfo->stobs);
 3153     strcpy(dset->endobs, sinfo->endobs);
 3154     colonize_obs(dset->stobs);
 3155     colonize_obs(dset->endobs);
 3156 
 3157     dset->sd0 = get_date_x(dset->pd, dset->stobs);
 3158     dset->n = sinfo->nobs;
 3159     dset->v = 2;
 3160 
 3161     dset->t1 = 0;
 3162     dset->t2 = dset->n - 1;
 3163 }
 3164 
 3165 /* construct a little dataset as a temporary wrapper for an
 3166    import using compact=spread
 3167 */
 3168 
 3169 static DATASET *make_import_tmpset (const DATASET *dset,
 3170                     SERIESINFO *sinfo,
 3171                     double **dbZ,
 3172                     int *err)
 3173 {
 3174     DATASET *tmpset = NULL;
 3175 
 3176     *err = compact_spread_pd_check(sinfo->pd, dset->pd);
 3177     if (*err) {
 3178     return NULL;
 3179     }
 3180 
 3181     tmpset = datainfo_new();
 3182     if (tmpset == NULL) {
 3183     *err = E_ALLOC;
 3184     return NULL;
 3185     }
 3186 
 3187     tmpset->v = 2;
 3188     tmpset->n = sinfo->nobs;
 3189 
 3190     tmpset->Z = malloc(2 * sizeof *tmpset->Z);
 3191     if (tmpset->Z == NULL) {
 3192     *err = E_ALLOC;
 3193     free(tmpset);
 3194     return NULL;
 3195     }
 3196 
 3197     *err = dataset_allocate_varnames(tmpset);
 3198     if (*err) {
 3199     free(tmpset->Z[1]);
 3200     free(tmpset->Z);
 3201     free(tmpset);
 3202     return NULL;
 3203     }
 3204 
 3205     tmpset->Z[0] = NULL;
 3206     tmpset->Z[1] = dbZ[1];
 3207     dbZ[1] = NULL; /* note: stolen! */
 3208 
 3209     tmpset->t1 = sinfo->t1;
 3210     tmpset->t2 = sinfo->t2;
 3211     tmpset->pd = sinfo->pd;
 3212     strcpy(tmpset->stobs, sinfo->stobs);
 3213     strcpy(tmpset->endobs, sinfo->endobs);
 3214     tmpset->structure = TIME_SERIES;
 3215     tmpset->sd0 = get_date_x(tmpset->pd, tmpset->stobs);
 3216 
 3217     strcpy(tmpset->varname[1], sinfo->varname);
 3218 
 3219 #if 0
 3220     PRN *prn = gretl_print_new(GRETL_PRINT_STDERR, NULL);
 3221     fprintf(stderr, "import_tmpset: t1=%d, t2=%d, nobs=%d, pd=%d, offset=%d\n",
 3222         sinfo->t1, sinfo->t2, sinfo->nobs, sinfo->pd, sinfo->offset);
 3223     printdata(NULL, NULL, tmpset, OPT_O, prn);
 3224     gretl_print_destroy(prn);
 3225 #endif
 3226 
 3227     return tmpset;
 3228 }
 3229 
 3230 static int
 3231 real_transcribe_db_data (const char *stobs, int nobs,
 3232              const DATASET *dset, int dbv,
 3233              const double *xvec)
 3234 {
 3235     int t, pad1, pad2;
 3236     int start, stop;
 3237     double x;
 3238 
 3239     pad1 = dateton(stobs, dset);
 3240     pad2 = dset->n - nobs - pad1;
 3241 
 3242     if (pad1 > 0) {
 3243     fprintf(stderr, "Padding at start, %d obs\n", pad1);
 3244     for (t=0; t<pad1; t++) {
 3245         dset->Z[dbv][t] = NADBL;
 3246     }
 3247     start = pad1;
 3248     } else {
 3249     start = 0;
 3250     }
 3251     if (pad2 > 0) {
 3252     int n = dset->n;
 3253 
 3254     fprintf(stderr, "Padding at end, %d obs\n", pad2);
 3255     for (t=n-1; t>=n-1-pad2; t--) {
 3256         dset->Z[dbv][t] = NADBL;
 3257     }
 3258     stop = n - pad2;
 3259     } else {
 3260     stop = dset->n;
 3261     }
 3262 
 3263     fprintf(stderr, "Filling in values from %d to %d\n", start, stop - 1);
 3264     for (t=start; t<stop; t++) {
 3265     x = xvec[t - pad1];
 3266     dset->Z[dbv][t] = (x == DBNA)? NADBL : x;
 3267     }
 3268 
 3269     return 0;
 3270 }
 3271 
 3272 int transcribe_db_data (DATASET *dset, int targv,
 3273             const double *src, int pd,
 3274             int nobs, char *stobs,
 3275             CompactMethod cmethod,
 3276             int interpolate)
 3277 {
 3278     double *xvec = (double *) src;
 3279     int free_xvec = 0;
 3280 
 3281     if (pd != dset->pd) {
 3282     if (pd < dset->pd) {
 3283         /* the series needs to be expanded */
 3284         xvec = expand_db_series(src, pd, &nobs, stobs, dset->pd,
 3285                     interpolate);
 3286     } else {
 3287         /* the series needs to be compacted */
 3288         xvec = compact_db_series(src, pd, &nobs, stobs, dset->pd,
 3289                      cmethod);
 3290     }
 3291     if (xvec == NULL) {
 3292         return E_ALLOC;
 3293     }
 3294     free_xvec = 1;
 3295     }
 3296 
 3297     real_transcribe_db_data(stobs, nobs, dset, targv, xvec);
 3298 
 3299     if (free_xvec) {
 3300     free(xvec);
 3301     }
 3302 
 3303     return 0;
 3304 }
 3305 
 3306 /* Processes a single db series in "spread" mode, meaning
 3307    that multiple series are added to the target dataset,
 3308    @dset. This variant is associated with gretl databases.
 3309 */
 3310 
 3311 int lib_spread_db_data (double **dbZ, SERIESINFO *sinfo,
 3312             DATASET *dset, PRN *prn)
 3313 {
 3314     int err = 0;
 3315 
 3316     if (dset == NULL || dset->v == 0) {
 3317     gretl_errmsg_set("\"compact=spread\": requires a dataset in place");
 3318     err = E_DATA;
 3319     } else {
 3320     DATASET *tmpset = make_import_tmpset(dset, sinfo, dbZ, &err);
 3321 
 3322     if (!err) {
 3323         err = do_compact_spread(tmpset, dset->pd);
 3324     }
 3325     if (!err) {
 3326         err = merge_or_replace_data(dset, &tmpset, OPT_X | OPT_U, prn);
 3327     }
 3328     }
 3329 
 3330     return err;
 3331 }
 3332 
 3333 /* Processes a single db series in "spread" mode, meaning
 3334    that multiple series are added to the target dataset,
 3335    @dset. This variant is associated with dbnomics import.
 3336 */
 3337 
 3338 int lib_spread_dbnomics_data (DATASET *dset, DATASET *dbset,
 3339                   PRN *prn)
 3340 {
 3341     int err = 0;
 3342 
 3343     if (dset == NULL || dset->v == 0) {
 3344     gretl_errmsg_set("\"compact=spread\": requires a dataset in place");
 3345     err = E_DATA;
 3346     } else {
 3347     err = do_compact_spread(dbset, dset->pd);
 3348     if (!err) {
 3349         /* we add OPT_K ("keep") to prevent destruction of @dbset:
 3350            we're bypassing get_merge_opts(), so we'd better know
 3351            what we're doing!
 3352         */
 3353         gretlopt merge_opt = (OPT_X | OPT_U | OPT_K);
 3354 
 3355         err = merge_or_replace_data(dset, &dbset, merge_opt, prn);
 3356     }
 3357     }
 3358 
 3359     return err;
 3360 }
 3361 
 3362 /* Processes a single db series, adding it to @dset if
 3363    possible (perhaps after compaction or expansion).
 3364 */
 3365 
 3366 static int lib_add_db_data (double **dbZ, SERIESINFO *sinfo,
 3367                 DATASET *dset, CompactMethod cmethod,
 3368                 int interpolate, int dbv, PRN *prn)
 3369 {
 3370     int new = (dbv == dset->v);
 3371     int err = 0;
 3372 
 3373     if (sinfo == NULL && dbZ == NULL) {
 3374     fprintf(stderr, "lib_add_db_data: broken call!\n");
 3375     return E_DATA;
 3376     }
 3377 
 3378     if (cmethod == COMPACT_NONE) {
 3379     /* impose default if need be */
 3380     cmethod = COMPACT_AVG;
 3381     }
 3382 
 3383     if (dset->n == 0) {
 3384     /* if the existing dataset is empty, initialize it
 3385        using info from the database series
 3386     */
 3387     init_datainfo_from_sinfo(dset, sinfo);
 3388     dset->v = 0; /* trigger for creating data array below */
 3389     if (dset->pd != 1 || strcmp(dset->stobs, "1")) {
 3390         dset->structure = TIME_SERIES;
 3391     }
 3392     } else {
 3393     err = check_db_import_full(sinfo->pd, sinfo->stobs, sinfo->endobs,
 3394                    sinfo->varname, dset);
 3395     if (err) {
 3396         return err;
 3397     }
 3398     }
 3399 
 3400     if (dset->v == 0) {
 3401     /* the data array is still empty */
 3402     dset->v = 2;
 3403     dbv = 1;
 3404     if (start_new_Z(dset, 0)) {
 3405         return E_ALLOC;
 3406     }
 3407     } else if (new && dataset_add_series(dset, 1)) {
 3408     return E_ALLOC;
 3409     }
 3410 
 3411 #if DB_DEBUG
 3412     fprintf(stderr, "dset->Z=%p\n", (void *) dset->Z);
 3413     fprintf(stderr, "dset->n = %d, dset->v = %d, dbv = %d\n",
 3414         dset->n, dset->v, dbv);
 3415 #endif
 3416 
 3417     err = transcribe_db_data(dset, dbv, dbZ[1], sinfo->pd, sinfo->nobs,
 3418                  sinfo->stobs, cmethod, interpolate);
 3419 
 3420     if (!err) {
 3421     /* common stuff for adding a var */
 3422     strcpy(dset->varname[dbv], sinfo->varname);
 3423     series_set_label(dset, dbv, sinfo->descrip);
 3424     series_set_compact_method(dset, dbv, cmethod);
 3425     } else if (new) {
 3426     /* we added a series that has not been filled */
 3427     dataset_drop_last_variables(dset, 1);
 3428     }
 3429 
 3430     return err;
 3431 }
 3432 
 3433 /* compact an individual series, in the context of converting an
 3434    entire working dataset to a lower frequency: used in all cases
 3435    except conversion from daily to monthly
 3436 */
 3437 
 3438 static double *compact_series (const DATASET *dset, int i, int oldn,
 3439                    int startskip, int min_startskip,
 3440                    int compfac, CompactMethod method)
 3441 {
 3442     const double *src = dset->Z[i];
 3443     double *x;
 3444     int lead = startskip - min_startskip;
 3445     int to_weekly = (compfac >= 5 && compfac <= 7);
 3446     int t, idx;
 3447 
 3448 #if DB_DEBUG
 3449     fprintf(stderr, "compact_series: startskip=%d, min_startskip=%d, compfac=%d "
 3450         "lead=%d\n", startskip, min_startskip, compfac, lead);
 3451 #endif
 3452 
 3453     x = malloc(dset->n * sizeof *x);
 3454     if (x == NULL) {
 3455     return NULL;
 3456     }
 3457 
 3458     for (t=0; t<dset->n; t++) {
 3459     x[t] = NADBL;
 3460     }
 3461 
 3462     idx = startskip;
 3463 
 3464     for (t=lead; t<dset->n && idx<oldn; t++) {
 3465     if (method == COMPACT_SOP) {
 3466         if (to_weekly && na(src[idx]) && idx < oldn - 1) {
 3467         /* allow one day's slack */
 3468         x[t] = src[idx + 1];
 3469         } else {
 3470         x[t] = src[idx];
 3471         }
 3472     } else if (method == COMPACT_EOP) {
 3473         if (to_weekly && na(src[idx]) && idx > startskip) {
 3474         /* one day's slack */
 3475         x[t] = src[idx - 1];
 3476         } else {
 3477         x[t] = src[idx];
 3478         }
 3479     } else if (method == COMPACT_SUM || method == COMPACT_AVG) {
 3480         int j, st, den = compfac;
 3481         int n_ok = 0;
 3482 
 3483         if (idx + compfac - 1 > oldn - 1) {
 3484         break;
 3485         }
 3486 
 3487         x[t] = 0.0;
 3488 
 3489         for (j=0; j<compfac; j++) {
 3490         st = idx + j;
 3491         if (na(src[st])) {
 3492             if (to_weekly) {
 3493             den--;
 3494             } else {
 3495             x[t] = NADBL;
 3496             break;
 3497             }
 3498         } else {
 3499             /* got a valid observation */
 3500             n_ok++;
 3501             x[t] += src[st];
 3502         }
 3503         }
 3504 
 3505         if (n_ok == 0) {
 3506         x[t] = NADBL;
 3507         }
 3508 
 3509         if (method == COMPACT_AVG && !na(x[t])) {
 3510         if (den > 0) {
 3511             x[t] /= den;
 3512         } else {
 3513             x[t] = NADBL;
 3514         }
 3515         }
 3516     }
 3517     idx += compfac;
 3518     }
 3519 
 3520     return x;
 3521 }
 3522 
 3523 /* Determine year and period (either month or quarter,
 3524    depending on the value of @pd) for observation @t in
 3525    daily dataset @dset.
 3526 */
 3527 
 3528 static int daily_yp (const DATASET *dset, int t,
 3529              int pd, int *y, int *p)
 3530 {
 3531     char obs[12];
 3532     int mon, day;
 3533 
 3534     ntodate(obs, t, dset);
 3535 
 3536     if (sscanf(obs, YMD_READ_FMT, y, &mon, &day) != 3) {
 3537     return E_DATA;
 3538     }
 3539 
 3540     if (pd == 12) {
 3541     *p = mon;
 3542     } else {
 3543     /* convert month to quarter */
 3544     *p = 1 + (mon - 1) / 3;
 3545     }
 3546 
 3547     return 0;
 3548 }
 3549 
 3550 #define DAYDBG 0
 3551 
 3552 /* For a single row, @cset_t, of a compacted dataset,
 3553    write daily values into the set of monthly or
 3554    quarterly series that will represent them. The
 3555    daily data are drawn from @dset and transcribed to
 3556    @cset.
 3557 */
 3558 
 3559 static void fill_cset_t (const DATASET *dset,
 3560              int *startday,
 3561              DATASET *cset,
 3562              int cset_t,
 3563              int compfac,
 3564              int qmonth)
 3565 {
 3566     char obs[OBSLEN];
 3567     double cvec[30];
 3568     int idx[30];
 3569     const double *z;
 3570     int y, p, pstart = 0;
 3571     int effn, ndays = 0;
 3572     int i, j, k, s, t, t0;
 3573     double zsum = 0.0;
 3574 
 3575     t0 = *startday;
 3576     y = p = 0;
 3577 
 3578     /* how many daily obs do we have in this month? */
 3579     for (t=t0; t<dset->n; t++) {
 3580     daily_yp(dset, t, 12, &y, &p);
 3581     if (t == t0) {
 3582         pstart = p;
 3583     } else if (p != pstart) {
 3584         break;
 3585     }
 3586     ndays++;
 3587     }
 3588 
 3589 #if 0
 3590     fprintf(stderr, "fill_cset_t: ndays = %d, compfac = %d\n",
 3591         ndays, compfac);
 3592 #endif
 3593 
 3594     /* construct array of month-day indices */
 3595     for (j=0; j<compfac && j<ndays; j++) {
 3596     ntodate(obs, t0 + j, dset);
 3597     idx[j] = date_to_daily_index(obs, dset->pd);
 3598     }
 3599 
 3600     /* the outer loop is over the daily series in the
 3601        source dataset */
 3602 
 3603     k = 1 + qmonth * compfac;
 3604 
 3605     for (i=1; i<dset->v; i++) {
 3606     z = dset->Z[i] + t0;
 3607     for (j=0; j<compfac; j++) {
 3608         cvec[j] = NADBL;
 3609     }
 3610     effn = 0;
 3611     zsum = 0.0;
 3612     for (j=0; j<compfac && j<ndays; j++) {
 3613         s = idx[j];
 3614         cvec[s] = z[j];
 3615         if (!na(cvec[s])) {
 3616         zsum += cvec[s];
 3617         effn++;
 3618         }
 3619     }
 3620     if (effn < compfac) {
 3621         /* we have some padding to do */
 3622         double zbar = zsum / effn;
 3623 
 3624         for (j=0; j<compfac; j++) {
 3625         if (na(cvec[j])) {
 3626             cvec[j] = zbar;
 3627         }
 3628         }
 3629     }
 3630     /* transcribe into target dataset */
 3631     for (j=0; j<compfac; j++) {
 3632         cset->Z[k+j][cset_t] = cvec[j];
 3633     }
 3634 
 3635     k += compfac;
 3636     }
 3637 
 3638     *startday += ndays;
 3639 }
 3640 
 3641 #define SPREAD_DEBUG 0
 3642 
 3643 /* compact daily data to monthly or quarterly using the
 3644    "spread" method */
 3645 
 3646 static DATASET *compact_daily_spread (const DATASET *dset,
 3647                       int newpd,
 3648                       int *nv,
 3649                       int *err)
 3650 {
 3651     const char *periods[] = {
 3652     "month",
 3653     "quarter"
 3654     };
 3655     const char *period;
 3656     DATASET *cset = NULL;
 3657     char label[MAXLABEL];
 3658     int oldpd = dset->pd;
 3659     int compfac;
 3660     int v, i, j, k, t, T;
 3661     int startyr, startper;
 3662     int endyr, endper;
 3663     int startday;
 3664 
 3665     fprintf(stderr, "*** compact_daily_spread (newpd=%d) ***\n", newpd);
 3666 
 3667     daily_yp(dset, 0, newpd, &startyr, &startper);
 3668     daily_yp(dset, dset->n - 1, newpd, &endyr, &endper);
 3669     compfac = midas_days_per_period(dset->pd, newpd);
 3670 
 3671     if (newpd == 12) {
 3672     period = periods[0];
 3673     } else if (newpd == 4) {
 3674     period = periods[1];
 3675     } else {
 3676     *err = E_DATA;
 3677     return NULL;
 3678     }
 3679 
 3680     T = newpd * (endyr - startyr) + (endper - startper + 1);
 3681 
 3682     if (T <= 1) {
 3683     *err = E_DATA;
 3684     return NULL;
 3685     }
 3686 
 3687     /* the number of series, after compaction */
 3688     v = 1 + (dset->v - 1) * compfac;
 3689 
 3690 #if SPREAD_DEBUG
 3691     fprintf(stderr, "oldpd %d, newpd %d, nvars=%d, T=%d, start=%d:%d, end=%d:%d\n",
 3692         dset->pd, newpd, v, T, startyr, startper, endyr, endper);
 3693 #endif
 3694 
 3695     cset = create_new_dataset(v, T, 0);
 3696     if (cset == NULL) {
 3697     *err = E_ALLOC;
 3698     return NULL;
 3699     }
 3700 
 3701     if (newpd == 12) {
 3702     sprintf(cset->stobs, "%d:%02d", startyr, startper);
 3703     sprintf(cset->endobs, "%d:%02d", endyr, endper);
 3704     } else {
 3705     sprintf(cset->stobs, "%d:%d", startyr, startper);
 3706     sprintf(cset->endobs, "%d:%d", endyr, endper);
 3707     }
 3708 
 3709     cset->pd = newpd;
 3710     cset->structure = TIME_SERIES;
 3711     cset->sd0 = get_date_x(cset->pd, cset->stobs);
 3712 
 3713     /* ensure no uninitialized data */
 3714     for (i=1; i<v; i++) {
 3715     for (t=0; t<T; t++) {
 3716         cset->Z[i][t] = NADBL;
 3717     }
 3718     }
 3719 
 3720     /* do the actual data transcription first */
 3721     startday = 0;
 3722     for (t=0; t<T; t++) {
 3723     if (newpd == 4) {
 3724         fill_cset_t(dset, &startday, cset, t, compfac/3, 0);
 3725         fill_cset_t(dset, &startday, cset, t, compfac/3, 1);
 3726         fill_cset_t(dset, &startday, cset, t, compfac/3, 2);
 3727     } else {
 3728         fill_cset_t(dset, &startday, cset, t, compfac, 0);
 3729     }
 3730     }
 3731 
 3732     /* then name the series and reorganize */
 3733 
 3734     k = 1;
 3735     for (i=1; i<dset->v; i++) {
 3736     double *xtmp;
 3737     char sfx[16];
 3738     int p;
 3739 
 3740     /* switch data order */
 3741     for (j=0; j<compfac/2; j++) {
 3742         p = k + compfac - j - 1;
 3743         xtmp = cset->Z[k+j];
 3744         cset->Z[k+j] = cset->Z[p];
 3745         cset->Z[p] = xtmp;
 3746     }
 3747 
 3748     /* names and labels */
 3749     for (j=0; j<compfac; j++) {
 3750         strcpy(cset->varname[k+j], dset->varname[i]);
 3751         gretl_trunc(cset->varname[k+j], VNAMELEN - 5);
 3752         sprintf(sfx, "_d%02d", compfac - j);
 3753         strcat(cset->varname[k+j], sfx);
 3754         sprintf(label, "%s in day %d of %s", dset->varname[i],
 3755             compfac - j, period);
 3756         series_record_label(cset, k+j, label);
 3757         series_set_midas_period(cset, k+j, compfac - j);
 3758         series_set_midas_freq(cset, k+j, oldpd);
 3759         if (j == 0) {
 3760         series_set_midas_anchor(cset, k+j);
 3761         }
 3762     }
 3763 
 3764     /* advance column write position for next source series */
 3765     k += compfac;
 3766     }
 3767 
 3768 #if SPREAD_DEBUG > 1
 3769     PRN *prn = gretl_print_new(GRETL_PRINT_STDERR, NULL);
 3770     printdata(NULL, NULL, cset, OPT_O, prn);
 3771     gretl_print_destroy(prn);
 3772 #endif
 3773 
 3774     return cset;
 3775 }
 3776 
 3777 /* compact an entire dataset, transcribing from each higher-frequency
 3778    series to a set of lower-frequency series, each of which holds the
 3779    observations from a given sub-period
 3780 */
 3781 
 3782 static DATASET *compact_data_spread (const DATASET *dset, int newpd,
 3783                      int startmaj, int startmin,
 3784                      int endmaj, int endmin,
 3785                      int *nv, int *err)
 3786 {
 3787     const char *subper[] = {
 3788     "month",
 3789     "quarter"
 3790     };
 3791     const char *period[] = {
 3792     "year",
 3793     "quarter"
 3794     };
 3795     const char *p0, *p1;
 3796     DATASET *cset = NULL;
 3797     char sfx[16];
 3798     char label[MAXLABEL];
 3799     int oldpd = dset->pd;
 3800     int compfac = oldpd / newpd;
 3801     int v, i, j, k, t, T;
 3802     int q0 = 0, qT = 0;
 3803 
 3804     /* calculate @T, the number of observations that the compacted
 3805        dataset should comprise
 3806     */
 3807     if (newpd == 1) {
 3808     T = endmaj - startmaj + 1;
 3809     } else if (newpd == 4) {
 3810     T = oldpd * (endmaj - startmaj + 1) / compfac;
 3811     q0 = 1 + (startmin - 1) / 3;
 3812     qT = 1 + (endmin - 1) / 3;
 3813     T += qT - q0 - 3;
 3814     } else {
 3815     *err = E_DATA;
 3816     return NULL;
 3817     }
 3818 
 3819     if (T <= 1) {
 3820     *err = E_DATA;
 3821     return NULL;
 3822     }
 3823 
 3824     /* calculate @v, the number of series after compaction */
 3825     v = 1 + (dset->v - 1) * compfac;
 3826 
 3827 #if SPREAD_DEBUG
 3828     fprintf(stderr, "oldpd %d, newpd %d, v=%d, T=%d, start=%d:%d, end=%d:%d\n",
 3829         oldpd, newpd, v, T, startmaj, startmin, endmaj, endmin);
 3830 #endif
 3831 
 3832     cset = create_new_dataset(v, T, 0);
 3833     if (cset == NULL) {
 3834     *err = E_ALLOC;
 3835     return NULL;
 3836     }
 3837 
 3838     if (newpd == 1) {
 3839     sprintf(cset->stobs, "%d", startmaj);
 3840     sprintf(cset->endobs, "%d", endmaj);
 3841     p1 = period[0];
 3842     } else {
 3843     /* newpd must be 4 */
 3844     sprintf(cset->stobs, "%d:%d", startmaj, q0);
 3845     sprintf(cset->endobs, "%d:%d", endmaj, qT);
 3846     p1 = period[1];
 3847     }
 3848 
 3849     p0 = (oldpd == 12)? subper[0] : subper[1];
 3850 
 3851     cset->pd = newpd;
 3852     cset->structure = TIME_SERIES;
 3853     cset->sd0 = get_date_x(cset->pd, cset->stobs);
 3854 
 3855 #if SPREAD_DEBUG
 3856     fprintf(stderr, "stobs '%s', endobs '%s', sd0=%g, q0=%d\n",
 3857         cset->stobs, cset->endobs, cset->sd0, q0);
 3858 #endif
 3859 
 3860     k = 1; /* the first new series */
 3861 
 3862     for (i=1; i<dset->v; i++) {
 3863     /* loop across original data series */
 3864     double *xtmp;
 3865     int offset;
 3866     int p, s = 0;
 3867 
 3868     /* how many initial observations should be set to NA? */
 3869     if (newpd == 1) {
 3870         offset = startmin - 1;
 3871     } else {
 3872         offset = startmin - (1 + (q0 - 1) * compfac);
 3873     }
 3874 
 3875     for (t=0; t<T; t++) {
 3876         /* loop across new time periods */
 3877         for (j=0; j<compfac; j++) {
 3878         /* loop across new series <- sub-periods */
 3879         while (s < offset) {
 3880             cset->Z[k+j][t] = NADBL;
 3881             offset--;
 3882             j++;
 3883         }
 3884         if (s < dset->n) {
 3885             cset->Z[k+j][t] = dset->Z[i][s];
 3886         } else {
 3887             cset->Z[k+j][t] = NADBL;
 3888         }
 3889         s++;
 3890         }
 3891     }
 3892 
 3893     /* reverse the new columns: most recent first */
 3894     for (j=0; j<compfac/2; j++) {
 3895         p = k + compfac - j - 1;
 3896         xtmp = cset->Z[k+j];
 3897         cset->Z[k+j] = cset->Z[p];
 3898         cset->Z[p] = xtmp;
 3899     }
 3900 
 3901     /* names and labels */
 3902     for (j=0; j<compfac; j++) {
 3903         strcpy(cset->varname[k+j], dset->varname[i]);
 3904         if (oldpd == 12 && newpd == 4) {
 3905         gretl_trunc(cset->varname[k+j], VNAMELEN - 4);
 3906         sprintf(sfx, "_m%d", compfac - j);
 3907         } else if (oldpd == 12) {
 3908         /* going to annual */
 3909         gretl_trunc(cset->varname[k+j], VNAMELEN - 5);
 3910         sprintf(sfx, "_m%02d", compfac - j);
 3911         } else {
 3912         gretl_trunc(cset->varname[k+j], VNAMELEN - 4);
 3913         sprintf(sfx, "_q%d", compfac - j);
 3914         }
 3915         strcat(cset->varname[k+j], sfx);
 3916         sprintf(label, "%s in %s %d of %s", dset->varname[i],
 3917             p0, compfac - j, p1);
 3918         series_record_label(cset, k+j, label);
 3919         series_set_midas_period(cset, k+j, compfac - j);
 3920         series_set_midas_freq(cset, k+j, oldpd);
 3921         if (j == 0) {
 3922         series_set_midas_anchor(cset, k+j);
 3923         }
 3924     }
 3925 
 3926     /* advance column write position for next source series */
 3927     k += compfac;
 3928     }
 3929 
 3930 #if SPREAD_DEBUG > 1
 3931     PRN *prn = gretl_print_new(GRETL_PRINT_STDERR, NULL);
 3932     printdata(NULL, NULL, cset, OPT_O, prn);
 3933     gretl_print_destroy(prn);
 3934 #endif
 3935 
 3936     return cset;
 3937 }
 3938 
 3939 /* specific apparatus for converting daily time series to monthly */
 3940 
 3941 static double *extend_series (const double *z, int n)
 3942 {
 3943     double *x = malloc(n * sizeof *x);
 3944 
 3945     if (x != NULL) {
 3946     int t;
 3947 
 3948     x[0] = NADBL;
 3949     for (t=1; t<n; t++) {
 3950         x[t] = z[t-1];
 3951     }
 3952     }
 3953 
 3954     return x;
 3955 }
 3956 
 3957 #define DMDEBUG 0
 3958 
 3959 static double *
 3960 daily_series_to_monthly (DATASET *dset, int i,
 3961              int nm, int yr, int mon, int offset,
 3962              int any_eop, CompactMethod method)
 3963 {
 3964     double *x;
 3965     const double *src = dset->Z[i];
 3966     const double *z;
 3967     double *tmp = NULL;
 3968     int t, sop_t, eop_t;
 3969 
 3970     x = malloc(nm * sizeof *x);
 3971     if (x == NULL) {
 3972     return NULL;
 3973     }
 3974 
 3975     if (offset < 0) {
 3976     tmp = extend_series(src, dset->n + 1);
 3977     if (tmp == NULL) {
 3978         free(x);
 3979         return NULL;
 3980     }
 3981     /* this permits use of a negative offset */
 3982     z = tmp + 1;
 3983     } else {
 3984     z = src;
 3985     }
 3986 
 3987     /* Note: we can't necessarily assume that the first obs
 3988        is the first day of a month. The @offset value gives the
 3989        number of daily observations (allowing for the number of
 3990        observed days in the week) in the first month of the daily
 3991        data, prior to the data actually starting.
 3992     */
 3993 
 3994     /* The "one day's slack" business below, with start-of-period and
 3995        end-of-period compaction is designed to allow for the
 3996        possibility that the first (or last) day of the month may not
 3997        have been a trading day.
 3998     */
 3999 
 4000     /* first obs for start-of-period */
 4001     sop_t = offset;
 4002 
 4003     /* first obs for end-of-period */
 4004     if (sop_t > 0) {
 4005     eop_t = offset - 1;
 4006     } else {
 4007     /* the first obs starts a month */
 4008     eop_t = get_days_in_month(mon, yr, dset->pd, 0) - 1;
 4009     }
 4010 
 4011 #if DMDEBUG
 4012     fprintf(stderr, "starting: offset=%d, any_eop=%d, sop_t=%d, eop_t=%d\n",
 4013         offset, any_eop, sop_t, eop_t);
 4014 #endif
 4015 
 4016     for (t=0; t<nm; t++) {
 4017     /* loop across the months in the compacted data */
 4018     int mdays = get_days_in_month(mon, yr, dset->pd, 0);
 4019 
 4020     if (t > 0) {
 4021         eop_t += mdays;
 4022     }
 4023 
 4024 #if DMDEBUG
 4025     fprintf(stderr, "t=%d: mon=%d, mdays=%d, sop_t=%d, eop_t=%d\n",
 4026         t, mon, mdays, sop_t, eop_t);
 4027 #endif
 4028 
 4029     if (t == 0 && offset > 0 && any_eop && method != COMPACT_EOP) {
 4030         /* we started with an incomplete month: so any
 4031            method other than EOP yields an NA */
 4032         x[t] = NADBL;
 4033     } else if (method == COMPACT_SOP) {
 4034         /* allow one days's slack */
 4035         if (na(z[sop_t]) && sop_t < dset->n - 1) {
 4036         x[t] = z[sop_t + 1];
 4037         } else {
 4038         x[t] = z[sop_t];
 4039         }
 4040     } else if (method == COMPACT_EOP) {
 4041         if (eop_t >= dset->n) {
 4042         x[t] = NADBL;
 4043         } else {
 4044         /* allow one days's slack */
 4045         if (na(z[eop_t]) && eop_t > 0) {
 4046             x[t] = z[eop_t - 1];
 4047         } else {
 4048             x[t] = z[eop_t];
 4049         }
 4050         }
 4051     } else if (method == COMPACT_SUM ||
 4052            method == COMPACT_AVG) {
 4053         int j, dayt, den = mdays;
 4054         int n_ok = 0;
 4055 
 4056         x[t] = 0.0;
 4057 
 4058         for (j=0; j<mdays; j++) {
 4059         dayt = sop_t + j;
 4060         if (dayt >= dset->n) {
 4061             x[t] = NADBL;
 4062             break;
 4063         } else if (na(z[dayt])) {
 4064             if (method == COMPACT_AVG) {
 4065             den--;
 4066             }
 4067         } else {
 4068             /* got a valid observation */
 4069             x[t] += z[dayt];
 4070             n_ok++;
 4071         }
 4072         }
 4073 
 4074         if (n_ok == 0) {
 4075         x[t] = NADBL;
 4076         }
 4077 
 4078         if (method == COMPACT_AVG && !na(x[t])) {
 4079         if (den > 0) {
 4080             x[t] /= (double) den;
 4081         } else {
 4082             x[t] = NADBL;
 4083         }
 4084         }
 4085     }
 4086 
 4087     sop_t += mdays;
 4088 
 4089     if (mon == 12) {
 4090         mon = 1;
 4091         yr++;
 4092     } else {
 4093         mon++;
 4094     }
 4095     }
 4096 
 4097     if (tmp != NULL) {
 4098     free(tmp);
 4099     }
 4100 
 4101     return x;
 4102 }
 4103 
 4104 static void
 4105 get_startskip_etc (int compfac, int startmin, int endmin,
 4106            int oldn, CompactMethod method,
 4107            int *startskip, int *newn)
 4108 {
 4109     int ss = 0, n = 0;
 4110 
 4111     if (method == COMPACT_EOP) {
 4112     int unused;
 4113 
 4114     ss = (compfac - (startmin % compfac)) % compfac;
 4115     n = oldn / compfac;
 4116     unused = oldn - 1 - ss - (n-1) * compfac;
 4117     if (unused >= compfac) {
 4118         n++;
 4119     }
 4120     } else if (method == COMPACT_SOP) {
 4121     int unused;
 4122 
 4123     ss = (compfac - (startmin % compfac) + 1) % compfac;
 4124     n = oldn / compfac;
 4125     unused = oldn - 1 - ss - (n-1) * compfac;
 4126     if (unused >= compfac) {
 4127         n++;
 4128     }
 4129     } else {
 4130     int es = endmin % compfac;
 4131 
 4132     ss = (compfac - (startmin % compfac) + 1) % compfac;
 4133     n = (oldn - ss - es) / compfac;
 4134     }
 4135 
 4136     *startskip = ss;
 4137     *newn = n;
 4138 }
 4139 
 4140 /* specific to compaction of daily time series */
 4141 
 4142 static void
 4143 get_daily_compact_params (CompactMethod default_method,
 4144               int *any_eop, int *any_sop,
 4145               int *all_same,
 4146               const DATASET *dset)
 4147 {
 4148     int i, n_not_eop = 0, n_not_sop = 0;
 4149 
 4150     *all_same = 1;
 4151     *any_eop = (default_method == COMPACT_EOP)? 1 : 0;
 4152     *any_sop = (default_method == COMPACT_SOP)? 1 : 0;
 4153 
 4154     for (i=1; i<dset->v; i++) {
 4155     CompactMethod method = series_get_compact_method(dset, i);
 4156 
 4157     if (method != default_method && method != COMPACT_NONE) {
 4158         *all_same = 0;
 4159         if (method == COMPACT_EOP) {
 4160         *any_eop = 1;
 4161         } else {
 4162         n_not_eop++;
 4163         }
 4164         if (method == COMPACT_SOP) {
 4165         *any_sop = 1;
 4166         } else {
 4167         n_not_sop++;
 4168         }
 4169     }
 4170     }
 4171 
 4172     if (n_not_eop == dset->v - 1) {
 4173     *any_eop = 0;
 4174     }
 4175 
 4176     if (n_not_sop == dset->v - 1) {
 4177     *any_sop = 0;
 4178     }
 4179 }
 4180 
 4181 /* specific to non-daily time series (monthly or quarterly) */
 4182 
 4183 static void
 4184 get_global_compact_params (int compfac, int startmin, int endmin,
 4185                CompactMethod default_method,
 4186                int *min_startskip, int *max_n,
 4187                int *any_eop, int *all_same,
 4188                DATASET *dset)
 4189 {
 4190     CompactMethod method;
 4191     int i, startskip, n;
 4192     int n_not_eop = 0;
 4193 
 4194     for (i=0; i<dset->v; i++) {
 4195     if (i == 0) {
 4196         get_startskip_etc(compfac, startmin, endmin, dset->n,
 4197                   default_method, &startskip, &n);
 4198         if (default_method == COMPACT_EOP) {
 4199         *any_eop = 1;
 4200         }
 4201     } else {
 4202         method = series_get_compact_method(dset, i);
 4203         if (method != default_method && method != COMPACT_NONE) {
 4204         get_startskip_etc(compfac, startmin, endmin, dset->n,
 4205                   method, &startskip, &n);
 4206         *all_same = 0;
 4207         if (method == COMPACT_EOP) {
 4208             *any_eop = 1;
 4209         } else {
 4210             n_not_eop++;
 4211         }
 4212         }
 4213     }
 4214     if (startskip < *min_startskip) {
 4215         *min_startskip = startskip;
 4216     }
 4217     if (n > *max_n) {
 4218         *max_n = n;
 4219     }
 4220     }
 4221 
 4222     if (n_not_eop == dset->v - 1) {
 4223     *any_eop = 0;
 4224     }
 4225 }
 4226 
 4227 static int get_obs_maj_min (const char *obs, int *maj, int *min)
 4228 {
 4229     int np = sscanf(obs, "%d:%d", maj, min);
 4230 
 4231     if (np < 2) {
 4232     np = sscanf(obs, "%d.%d", maj, min);
 4233     }
 4234 
 4235     return (np == 2);
 4236 }
 4237 
 4238 /* for daily data, figure the number of observations to
 4239    be skipped at the start of each series
 4240 */
 4241 
 4242 static int get_daily_offset (const DATASET *dset,
 4243                  int y, int m, int d,
 4244                  int skip, int any_eop)
 4245 {
 4246     int ret = 0;
 4247 
 4248     if (skip) {
 4249     /* moving to start of next month: offset = no. of
 4250        observations in the first month */
 4251     ret = days_in_month_after(y, m, d, dset->pd) + 1;
 4252     } else if (any_eop && !day_starts_month(d, m, y, dset->pd, NULL)) {
 4253     ret = days_in_month_after(y, m, d, dset->pd) + 1;
 4254     } else {
 4255     /* offset = no. of obs missing at start of first month */
 4256     ret = days_in_month_before(y, m, d, dset->pd);
 4257 #if DMDEBUG
 4258     fprintf(stderr, "days_in_month_before %d-%02d-%02d = %d "
 4259         "for pd=%d\n", y, m, d, ret, dset->pd);
 4260 #endif
 4261     }
 4262 
 4263     return ret;
 4264 }
 4265 
 4266 /* for daily data, figure the number of valid monthly
 4267    observations that can be constructed by compaction
 4268 */
 4269 
 4270 static int get_n_ok_months (const DATASET *dset,
 4271                 CompactMethod default_method,
 4272                 int *startyr, int *startmon,
 4273                 int *endyr, int *endmon,
 4274                 int *offset, int *p_any_eop)
 4275 {
 4276     int y1, m1, d1, y2, m2, d2;
 4277     int any_eop, any_sop, all_same;
 4278     int skip = 0, pad = 0, nm = -1;
 4279 
 4280     if (sscanf(dset->stobs, YMD_READ_FMT, &y1, &m1, &d1) != 3) {
 4281     return -1;
 4282     }
 4283     if (sscanf(dset->endobs, YMD_READ_FMT, &y2, &m2, &d2) != 3) {
 4284     return -1;
 4285     }
 4286 
 4287     if (y1 < 100) {
 4288     y1 = FOUR_DIGIT_YEAR(y1);
 4289     }
 4290     if (y2 < 100) {
 4291     y2 = FOUR_DIGIT_YEAR(y2);
 4292     }
 4293 
 4294     nm = 12 * (y2 - y1) + m2 - m1 + 1;
 4295 
 4296     get_daily_compact_params(default_method, &any_eop, &any_sop,
 4297                  &all_same, dset);
 4298 
 4299     *startyr = y1;
 4300     *startmon = m1;
 4301     *endyr = y2;
 4302     *endmon = m2;
 4303 
 4304 #if DMDEBUG
 4305     fprintf(stderr, "get_n_ok_months: any_sop=%d, any_eop=%d, "
 4306         "all_same=%d\n", any_sop, any_eop, all_same);
 4307     fprintf(stderr, "y1=%d m1=%d d1=%d; y2=%d m2=%d d2=%d\n",
 4308         y1, m1, d1, y2, m2, d2);
 4309 #endif
 4310 
 4311     if (!day_starts_month(d1, m1, y1, dset->pd, &pad) && !any_eop) {
 4312     if (*startmon == 12) {
 4313         *startmon = 1;
 4314         *startyr += 1;
 4315     } else {
 4316         *startmon += 1;
 4317     }
 4318     skip = 1;
 4319     nm--;
 4320     }
 4321 
 4322     if (!day_ends_month(d2, m2, y2, dset->pd) && !any_sop) {
 4323     if (*endmon == 1) {
 4324         *endmon = 12;
 4325         *endyr -= 1;
 4326     } else {
 4327         *endmon -= 1;
 4328     }
 4329     nm--;
 4330     }
 4331 
 4332 #if DMDEBUG
 4333     fprintf(stderr, "after adjustment: range %d:%02d to %d:%02d, "
 4334         "pad=%d, skip=%d\n", *startyr, *startmon, *endyr, *endmon,
 4335         pad, skip);
 4336 #endif
 4337 
 4338     if (pad) {
 4339     *offset = -1;
 4340     } else {
 4341     *offset = get_daily_offset(dset, y1, m1, d1, skip, any_eop);
 4342     }
 4343 
 4344     *p_any_eop = any_eop;
 4345 
 4346     return nm;
 4347 }
 4348 
 4349 #define WEEKLY_DEBUG 0
 4350 
 4351 static int
 4352 weeks_to_months_exec (double **mZ, const DATASET *dset,
 4353               CompactMethod method)
 4354 {
 4355     char obsstr[OBSLEN];
 4356     int *mn = NULL;
 4357     int yr, mon, day;
 4358     int monbak = 0;
 4359     int i, s, t = 0;
 4360     int err = 0;
 4361 
 4362     mn = malloc(dset->v * sizeof *mn);
 4363     if (mn == NULL) {
 4364     return E_ALLOC;
 4365     }
 4366 
 4367     for (i=1; i<dset->v; i++) {
 4368     /* initialize all series, first obs */
 4369     mZ[i][0] = NADBL;
 4370     mn[i] = 0;
 4371     }
 4372 
 4373     for (s=0; s<dset->n; s++) {
 4374     /* loop across the weekly obs in this month */
 4375     ntodate(obsstr, s, dset);
 4376     sscanf(obsstr, YMD_READ_FMT, &yr, &mon, &day);
 4377     if (monbak > 0 && mon != monbak) {
 4378         /* new month: finalize the previous one */
 4379         for (i=1; i<dset->v; i++) {
 4380         if (method == COMPACT_EOP) {
 4381             if (s > 0) {
 4382             mZ[i][t] = dset->Z[i][s-1];
 4383             }
 4384         } else if (method == COMPACT_AVG) {
 4385             if (mn[i] > 0) {
 4386             mZ[i][t] /= (double) mn[i];
 4387             }
 4388         }
 4389         }
 4390         /* and start another? */
 4391         if (s < dset->n - 1) {
 4392         t++;
 4393         for (i=1; i<dset->v; i++) {
 4394             /* initialize all series, current obs */
 4395             if (method == COMPACT_SOP) {
 4396             mZ[i][t] = dset->Z[i][s];
 4397             } else {
 4398             mZ[i][t] = NADBL;
 4399             }
 4400             mn[i] = 0;
 4401         }
 4402         }
 4403     }
 4404 
 4405     /* cumulate non-missing weekly observations? */
 4406     for (i=1; i<dset->v; i++) {
 4407         if (method == COMPACT_SOP) {
 4408         ; /* handled above */
 4409         } else if (method == COMPACT_EOP) {
 4410         mZ[i][t] = dset->Z[i][s];
 4411         } else if (!na(dset->Z[i][s])) {
 4412         if (na(mZ[i][t])) {
 4413             mZ[i][t] = dset->Z[i][s];
 4414         } else {
 4415             mZ[i][t] += dset->Z[i][s];
 4416         }
 4417         mn[i] += 1;
 4418         }
 4419         if (mon == monbak && s == dset->n - 1) {
 4420         /* reached the end: ship out last obs */
 4421         if (method == COMPACT_EOP) {
 4422             mZ[i][t] = NADBL;
 4423         } else if (method == COMPACT_AVG && mn[i] > 0) {
 4424             mZ[i][t] /= (double) mn[i];
 4425         }
 4426         }
 4427     }
 4428     monbak = mon;
 4429     }
 4430 
 4431     free(mn);
 4432 
 4433     return err;
 4434 }
 4435 
 4436 static int
 4437 weeks_to_months_check (const DATASET *dset, int *startyr, int *endyr,
 4438                int *startmon, int *endmon)
 4439 {
 4440     char obsstr[OBSLEN];
 4441     int yr, mon, day;
 4442     int wcount = 0, mcount = 0;
 4443     int monbak = 0;
 4444     int t, err = 0;
 4445 
 4446     for (t=0; t<dset->n; t++) {
 4447     ntodate(obsstr, t, dset);
 4448     if (sscanf(obsstr, YMD_READ_FMT, &yr, &mon, &day) != 3) {
 4449         err = 1;
 4450         break;
 4451     }
 4452     if (monbak == 0) {
 4453         /* first obs */
 4454         fprintf(stderr, "starting month = '%d'\n", mon);
 4455         *startyr = yr;
 4456         *startmon = mon;
 4457         mcount++;
 4458         wcount = 1;
 4459     } else if (mon != monbak) {
 4460         /* got a new month: report on previous one */
 4461 #if WEEKLY_DEBUG
 4462         fprintf(stderr, "month %d ('%d'), weekly obs = %d\n",
 4463             mcount, monbak, wcount);
 4464 #endif
 4465         mcount++;
 4466         wcount = 1;
 4467     } else {
 4468         /* continuation of current month */
 4469         wcount++;
 4470     }
 4471     monbak = mon;
 4472     }
 4473 
 4474     if (err) {
 4475     mcount = 0;
 4476     } else {
 4477     /* flush the last observation */
 4478 #if WEEKLY_DEBUG
 4479     fprintf(stderr, "month %d ('%d'), weekly obs = %d\n",
 4480         mcount, monbak, wcount);
 4481 #endif
 4482     *endyr = yr;
 4483     *endmon = mon;
 4484     }
 4485 
 4486     return mcount;
 4487 }
 4488 
 4489 static int weekly_dataset_to_monthly (DATASET *dset,
 4490                       CompactMethod method)
 4491 {
 4492     DATASET mset;
 4493     int startyr = 1, endyr = 1;
 4494     int startmon = 1, endmon = 1;
 4495     int err = 0;
 4496 
 4497     mset.n = weeks_to_months_check(dset, &startyr, &endyr, &startmon, &endmon);
 4498     fprintf(stderr, "Weekly data: found %d months\n", mset.n);
 4499     if (mset.n <= 0) {
 4500     return E_DATA;
 4501     }
 4502 
 4503     mset.v = dset->v;
 4504     err = allocate_Z(&mset, 0);
 4505     if (err) {
 4506     return err;
 4507     }
 4508 
 4509     /* compact series */
 4510     if (!err && dset->v > 1) {
 4511     err = weeks_to_months_exec(mset.Z, dset, method);
 4512     }
 4513 
 4514     if (err) {
 4515     free_Z(&mset);
 4516     } else {
 4517     free_Z(dset);
 4518     dset->Z = mset.Z;
 4519     dset->n = mset.n;
 4520     dset->pd = 12;
 4521     sprintf(dset->stobs, "%04d:%02d", startyr, startmon);
 4522     sprintf(dset->endobs, "%04d:%02d", endyr, endmon);
 4523     dset->sd0 = get_date_x(dset->pd, dset->stobs);
 4524     dset->t1 = 0;
 4525     dset->t2 = dset->n - 1;
 4526     }
 4527 
 4528     return err;
 4529 }
 4530 
 4531 static int shorten_the_constant (double **Z, int n)
 4532 {
 4533     double *tmp = realloc(Z[0], n * sizeof *tmp);
 4534 
 4535     if (tmp == NULL) {
 4536     return E_ALLOC;
 4537     } else {
 4538     Z[0] = tmp;
 4539     return 0;
 4540     }
 4541 }
 4542 
 4543 /* conversion to weekly using a "representative day", e.g. use
 4544    each Wednesday value: @repday is 0-based on Sunday.
 4545 */
 4546 
 4547 static int daily_dataset_to_weekly (DATASET *dset, int repday)
 4548 {
 4549     int y1, m1, d1;
 4550     char obs[OBSLEN];
 4551     double *x = NULL;
 4552     double *tmp;
 4553     int n = 0, n_ok = 0;
 4554     int wday, ok;
 4555     int i, t, err = 0;
 4556 
 4557     fprintf(stderr, "daily_dataset_to_weekly: repday = %d\n", repday);
 4558 
 4559     for (t=0; t<dset->n; t++) {
 4560     ntodate(obs, t, dset);
 4561     wday = weekday_from_date(obs);
 4562     if (wday == repday) {
 4563         ok = 0;
 4564         for (i=1; i<dset->v; i++) {
 4565         if (!na(dset->Z[i][t])) {
 4566             ok = 1;
 4567             break;
 4568         }
 4569         }
 4570         if (ok) {
 4571         n_ok++;
 4572         }
 4573         if (n == 0) {
 4574         sscanf(obs, YMD_READ_FMT, &y1, &m1, &d1);
 4575         }
 4576         n++;
 4577     }
 4578     }
 4579 
 4580     if (n_ok == 0) {
 4581     gretl_errmsg_set(_("Compacted dataset would be empty"));
 4582     return 1;
 4583     }
 4584 
 4585     fprintf(stderr, "n=%d, n_ok=%d, y1=%d, m1=%d, d1=%d\n",
 4586         n, n_ok, y1, m1, d1);
 4587 
 4588     x = malloc(n * sizeof *x);
 4589     if (x == NULL) {
 4590     return E_ALLOC;
 4591     }
 4592 
 4593     err = shorten_the_constant(dset->Z, n);
 4594 
 4595     for (i=1; i<dset->v && !err; i++) {
 4596     int s = 0;
 4597 
 4598     for (t=0; t<dset->n; t++) {
 4599         ntodate(obs, t, dset);
 4600         wday = weekday_from_date(obs);
 4601         if (wday == repday) {
 4602         x[s++] = dset->Z[i][t];
 4603         }
 4604     }
 4605     tmp = realloc(dset->Z[i], n * sizeof *tmp);
 4606     if (tmp == NULL) {
 4607         err = E_ALLOC;
 4608     } else {
 4609         dset->Z[i] = tmp;
 4610         for (t=0; t<n; t++) {
 4611         dset->Z[i][t] = x[t];
 4612         }
 4613     }
 4614     }
 4615 
 4616     free(x);
 4617 
 4618     if (!err) {
 4619     dset->n = n;
 4620     dset->pd = 52;
 4621 
 4622     sprintf(dset->stobs, YMD_WRITE_Y4_FMT, y1, m1, d1);
 4623     dset->sd0 = get_date_x(dset->pd, dset->stobs);
 4624     dset->t1 = 0;
 4625     dset->t2 = dset->n - 1;
 4626     ntodate(dset->endobs, dset->t2, dset);
 4627 
 4628     dataset_destroy_obs_markers(dset);
 4629     }
 4630 
 4631     return err;
 4632 }
 4633 
 4634 static int daily_dataset_to_monthly (DATASET *dset,
 4635                      CompactMethod default_method)
 4636 {
 4637     int nm, startyr, startmon, endyr, endmon;
 4638     int offset, any_eop;
 4639     CompactMethod method;
 4640     double *x;
 4641     int i, err = 0;
 4642 
 4643     nm = get_n_ok_months(dset, default_method, &startyr, &startmon,
 4644              &endyr, &endmon, &offset, &any_eop);
 4645 
 4646     if (nm <= 0) {
 4647     gretl_errmsg_set(_("Compacted dataset would be empty"));
 4648     return E_DATA;
 4649     }
 4650 
 4651     err = shorten_the_constant(dset->Z, nm);
 4652 
 4653     for (i=1; i<dset->v && !err; i++) {
 4654     method = series_get_compact_method(dset, i);
 4655     if (method == COMPACT_NONE) {
 4656         method = default_method;
 4657     }
 4658 
 4659     x = daily_series_to_monthly(dset, i, nm,
 4660                     startyr, startmon,
 4661                     offset, any_eop, method);
 4662     if (x == NULL) {
 4663         err = E_ALLOC;
 4664     } else {
 4665         free(dset->Z[i]);
 4666         dset->Z[i] = x;
 4667     }
 4668     }
 4669 
 4670     if (!err) {
 4671     dset->n = nm;
 4672     dset->pd = 12;
 4673     sprintf(dset->stobs, "%04d:%02d", startyr, startmon);
 4674     sprintf(dset->endobs, "%04d:%02d", endyr, endmon);
 4675     dset->sd0 = get_date_x(dset->pd, dset->stobs);
 4676     dset->t1 = 0;
 4677     dset->t2 = dset->n - 1;
 4678 
 4679     dataset_destroy_obs_markers(dset);
 4680     }
 4681 
 4682     return err;
 4683 }
 4684 
 4685 static int get_daily_skip (const DATASET *dset, int t)
 4686 {
 4687     int dd = calendar_obs_number(dset->S[t], dset) -
 4688     calendar_obs_number(dset->S[t-1], dset);
 4689 
 4690     if (dd == 0) {
 4691     fprintf(stderr, "get_daily_skip: S[%d] = '%s', S[%d] = '%s'\n",
 4692         t, dset->S[t], t-1, dset->S[t-1]);
 4693     }
 4694 
 4695     return dd - 1;
 4696 }
 4697 
 4698 static int insert_missing_hidden_obs (DATASET *dset, int nmiss)
 4699 {
 4700     int oldn = dset->n;
 4701     double *tmp, **Z;
 4702     int i, s, t, skip;
 4703     int err = 0;
 4704 
 4705     err = dataset_add_observations(dset, nmiss, OPT_NONE);
 4706     if (err) {
 4707     return err;
 4708     }
 4709 
 4710 #if DB_DEBUG
 4711     fprintf(stderr, "daily data: expanded n from %d to %d\n",
 4712         oldn, dset->n);
 4713 #endif
 4714 
 4715     Z = dset->Z;
 4716     tmp = Z[0];
 4717 
 4718     for (i=1; i<dset->v && !err; i++) {
 4719     for (s=0; s<oldn; s++) {
 4720         tmp[s] = Z[i][s];
 4721     }
 4722 
 4723     Z[i][0] = tmp[0];
 4724     t = 1;
 4725     for (s=1; s<oldn; s++) {
 4726         skip = get_daily_skip(dset, s);
 4727         if (skip < 0) {
 4728         err = E_DATA;
 4729         break;
 4730         }
 4731         while (skip--) {
 4732         Z[i][t++] = NADBL;
 4733         }
 4734         Z[i][t++] = tmp[s];
 4735     }
 4736     }
 4737 
 4738     for (t=0; t<dset->n; t++) {
 4739     Z[0][t] = 1.0;
 4740     if (dset->S != NULL) {
 4741         calendar_date_string(dset->S[t], t, dset);
 4742     }
 4743     }
 4744 
 4745     if (!err) {
 4746     dset->t2 = dset->n - 1;
 4747     ntodate(dset->endobs, dset->n - 1, dset);
 4748     }
 4749 
 4750 #if DB_DEBUG > 1
 4751     fprintf(stderr, "insert_missing_hidden_obs, done, err = %d\n", err);
 4752     for (t=0; t<dset->n; t++) {
 4753     fprintf(stderr, "Z[1][%d] = %14g\n", t, Z[1][t]);
 4754     }
 4755 #endif
 4756 
 4757     return err;
 4758 }
 4759 
 4760 int maybe_expand_daily_data (DATASET *dset)
 4761 {
 4762     int nmiss = n_hidden_missing_obs(dset, 0, dset->n - 1);
 4763     int err = 0;
 4764 
 4765     fprintf(stderr, "n_hidden_missing_obs: nmiss = %d\n", nmiss);
 4766 
 4767     if (nmiss < 0) {
 4768     err = 1;
 4769     } else if (nmiss > 0) {
 4770     err = insert_missing_hidden_obs(dset, nmiss);
 4771     }
 4772 
 4773     return err;
 4774 }
 4775 
 4776 static int do_compact_spread (DATASET *dset, int newpd)
 4777 {
 4778     DATASET *cset = NULL;
 4779     int nv = 0;
 4780     int err;
 4781 
 4782     err = compact_spread_pd_check(dset->pd, newpd);
 4783     if (err) {
 4784     return err;
 4785     }
 4786 
 4787     if (dated_daily_data(dset)) {
 4788     err = maybe_expand_daily_data(dset);
 4789     if (err) {
 4790         gretl_errmsg_set("Error expanding daily data with missing observations");
 4791     } else {
 4792         cset = compact_daily_spread(dset, newpd, &nv, &err);
 4793     }
 4794     } else {
 4795     int startmaj, startmin;
 4796     int endmaj, endmin;
 4797 
 4798 
 4799     /* get starting obs major and minor components */
 4800     if (!get_obs_maj_min(dset->stobs, &startmaj, &startmin)) {
 4801         return E_DATA;
 4802     }
 4803 
 4804     /* get ending obs major and minor components */
 4805     if (!get_obs_maj_min(dset->endobs, &endmaj, &endmin)) {
 4806         return E_DATA;
 4807     }
 4808 
 4809     cset = compact_data_spread(dset, newpd, startmaj, startmin,
 4810                    endmaj, endmin, &nv, &err);
 4811     }
 4812 
 4813     if (!err) {
 4814     free_Z(dset);
 4815     clear_datainfo(dset, CLEAR_FULL);
 4816     *dset = *cset;
 4817     free(cset);
 4818     }
 4819 
 4820     return err;
 4821 }
 4822 
 4823 /**
 4824  * compact_data_set:
 4825  * @dset: dataset struct.
 4826  * @newpd: target data frequency.
 4827  * @default_method: code for the default compaction method.
 4828  * @monstart: if non-zero, take Monday rather than Sunday as
 4829  * the "start of the week" (only relevant for 7-day daily
 4830  * data).
 4831  * @repday: "representative day" for conversion from daily
 4832  * to weekly data (with method %COMPACT_WDAY only).
 4833  *
 4834  * Compact the data set from higher to lower frequency.
 4835  *
 4836  * Returns: 0 on success, non-zero error code on failure.
 4837  */
 4838 
 4839 int compact_data_set (DATASET *dset, int newpd,
 4840               CompactMethod default_method,
 4841               int monstart, int repday)
 4842 {
 4843     int newn, oldn = dset->n, oldpd = dset->pd;
 4844     int compfac;
 4845     int startmaj, startmin;
 4846     int endmaj, endmin;
 4847     int any_eop, all_same;
 4848     int min_startskip = 0;
 4849     char stobs[OBSLEN];
 4850     int i, err = 0;
 4851 
 4852     gretl_error_clear();
 4853 
 4854     if (default_method == COMPACT_SPREAD) {
 4855     return do_compact_spread(dset, newpd);
 4856     }
 4857 
 4858     if (oldpd == 52) {
 4859     return weekly_dataset_to_monthly(dset, default_method);
 4860     }
 4861 
 4862     if (dated_daily_data(dset)) {
 4863     /* allow for the possibility that the daily dataset
 4864        contains "hidden" or suppressed missing observations
 4865        (holidays are just skipped, not marked as NA)
 4866     */
 4867     err = maybe_expand_daily_data(dset);
 4868     if (err) {
 4869         gretl_errmsg_set("Error expanding daily data with missing observations");
 4870         return err;
 4871     } else {
 4872         oldn = dset->n;
 4873     }
 4874     }
 4875 
 4876     if (newpd == 52 && oldpd >= 5 && oldpd <= 7 &&
 4877     default_method == COMPACT_WDAY) {
 4878     /* daily to weekly, using "representative day" */
 4879     return daily_dataset_to_weekly(dset, repday);
 4880     } else if (newpd == 12 && oldpd >= 5 && oldpd <= 7) {
 4881     /* daily to monthly: special */
 4882     return daily_dataset_to_monthly(dset, default_method);
 4883     } else if (oldpd >= 5 && oldpd <= 7) {
 4884     /* daily to weekly */
 4885     compfac = oldpd;
 4886     if (dated_daily_data(dset)) {
 4887         startmin = weekday_from_date(dset->stobs);
 4888         if (oldpd == 7) {
 4889         if (monstart) {
 4890             if (startmin == 0) startmin = 7;
 4891         } else {
 4892             startmin++;
 4893         }
 4894         }
 4895     } else {
 4896         startmin = 1;
 4897     }
 4898     } else if (oldpd == 24 && newpd >= 5 && newpd <= 7) {
 4899     /* hourly to daily */
 4900     compfac = 24;
 4901     if (!get_obs_maj_min(dset->stobs, &startmaj, &startmin)) {
 4902         return 1;
 4903     }
 4904     } else {
 4905     compfac = oldpd / newpd;
 4906     /* get starting obs major and minor components */
 4907     if (!get_obs_maj_min(dset->stobs, &startmaj, &startmin)) {
 4908         return 1;
 4909     }
 4910     /* get ending obs major and minor components */
 4911     if (!get_obs_maj_min(dset->endobs, &endmaj, &endmin)) {
 4912         return 1;
 4913     }
 4914     }
 4915 
 4916     min_startskip = oldpd;
 4917     newn = 0;
 4918     any_eop = 0;
 4919     all_same = 1;
 4920     get_global_compact_params(compfac, startmin, endmin, default_method,
 4921                   &min_startskip, &newn, &any_eop, &all_same,
 4922                   dset);
 4923 
 4924     if (newn == 0 && default_method != COMPACT_SPREAD) {
 4925     gretl_errmsg_set(_("Compacted dataset would be empty"));
 4926     return 1;
 4927     }
 4928 
 4929     if (newpd == 1) {
 4930     if (min_startskip > 0 && !any_eop) {
 4931         startmaj++;
 4932     }
 4933     sprintf(stobs, "%d", startmaj);
 4934     } else if (newpd == 52) {
 4935     if (oldpd >= 5 && oldpd <= 7 && dset->S != NULL) {
 4936         strcpy(stobs, dset->S[min_startskip]);
 4937     } else {
 4938         strcpy(stobs, "1");
 4939     }
 4940     } else {
 4941     int m0 = startmin + min_startskip;
 4942     int minor = m0 / compfac + (m0 % compfac > 0);
 4943 
 4944     if (minor > newpd) {
 4945         startmaj++;
 4946         minor -= newpd;
 4947     }
 4948     format_obs(stobs, startmaj, minor, newpd);
 4949     }
 4950 
 4951     /* revise datainfo members */
 4952     strcpy(dset->stobs, stobs);
 4953     dset->pd = newpd;
 4954     dset->n = newn;
 4955     dset->sd0 = get_date_x(dset->pd, dset->stobs);
 4956     dset->t1 = 0;
 4957     dset->t2 = dset->n - 1;
 4958     ntodate(dset->endobs, dset->t2, dset);
 4959 
 4960     if (oldpd >= 5 && oldpd <= 7 && dset->markers) {
 4961     /* remove any daily date strings; revise endobs */
 4962     dataset_destroy_obs_markers(dset);
 4963     ntodate(dset->endobs, dset->t2, dset);
 4964     }
 4965 
 4966     err = shorten_the_constant(dset->Z, dset->n);
 4967 
 4968     /* compact the individual data series */
 4969     for (i=1; i<dset->v && !err; i++) {
 4970     CompactMethod this_method = default_method;
 4971     int startskip = min_startskip;
 4972     double *x;
 4973 
 4974     if (!all_same) {
 4975         CompactMethod m_i = series_get_compact_method(dset, i);
 4976 
 4977         if (m_i != COMPACT_NONE) {
 4978         this_method = m_i;
 4979         }
 4980 
 4981         startskip = compfac - (startmin % compfac) + 1;
 4982         startskip = startskip % compfac;
 4983         if (this_method == COMPACT_EOP) {
 4984         if (startskip > 0) {
 4985             startskip--;
 4986         } else {
 4987             startskip = compfac - 1;
 4988         }
 4989         }
 4990     }
 4991 
 4992     x = compact_series(dset, i, oldn, startskip, min_startskip,
 4993                compfac, this_method);
 4994     if (x == NULL) {
 4995         err = E_ALLOC;
 4996     } else {
 4997         free(dset->Z[i]);
 4998         dset->Z[i] = x;
 4999     }
 5000     }
 5001 
 5002     return err;
 5003 }
 5004 
 5005 static gretl_matrix *interpol_expand_dataset (const DATASET *dset,
 5006                           int newpd, int *err)
 5007 {
 5008     gretl_matrix *Y0, *Y1 = NULL;
 5009 
 5010     Y0 = gretl_matrix_data_subset(NULL, dset, 0, dset->n - 1,
 5011                   M_MISSING_ERROR, err);
 5012 
 5013     if (!*err) {
 5014     int f = newpd / dset->pd;
 5015 
 5016     Y1 = matrix_chowlin(Y0, NULL, f, err);
 5017     gretl_matrix_free(Y0);
 5018     }
 5019 
 5020     return Y1;
 5021 }
 5022 
 5023 /**
 5024  * expand_data_set:
 5025  * @dset: dataset struct.
 5026  * @newpd: target data frequency.
 5027  * @interpol: use interpolation (0/1).
 5028  *
 5029  * Expand the data set from lower to higher frequency: an "expert"
 5030  * option.  This is supported at present only for expansion from
 5031  * annual to quarterly or monthly, or from quarterly to monthly.
 5032  *
 5033  * Returns: 0 on success, non-zero error code on failure.
 5034  */
 5035 
 5036 int expand_data_set (DATASET *dset, int newpd, int interpol)
 5037 {
 5038     char stobs[OBSLEN];
 5039     int oldn = dset->n;
 5040     int oldpd = dset->pd;
 5041     int t1 = dset->t1;
 5042     int t2 = dset->t2;
 5043     int mult, newn, nadd;
 5044     gretl_matrix *X = NULL;
 5045     double *x = NULL;
 5046     int i, j, t;
 5047     int err = 0;
 5048 
 5049     if (oldpd != 1 && oldpd != 4) {
 5050     return E_PDWRONG;
 5051     } else if (oldpd == 1 && newpd != 4 && newpd != 12) {
 5052     return E_DATA;
 5053     } else if (oldpd == 4 && newpd != 12) {
 5054     return E_DATA;
 5055     } else if (oldpd == 1 && newpd == 12 && interpol) {
 5056     return E_DATA;
 5057     }
 5058 
 5059     if (interpol) {
 5060     X = interpol_expand_dataset(dset, newpd, &err);
 5061     } else {
 5062     x = malloc(oldn * sizeof *x);
 5063     if (x == NULL) {
 5064         err = E_ALLOC;
 5065     }
 5066     }
 5067 
 5068     if (err) {
 5069     return err;
 5070     }
 5071 
 5072     mult = newpd / oldpd;  /* frequency increase factor */
 5073     newn = mult * dset->n; /* revised number of observations */
 5074     nadd = newn - oldn;    /* number of obs to add */
 5075 
 5076     err = dataset_add_observations(dset, nadd, OPT_D);
 5077     if (err) {
 5078     goto bailout;
 5079     }
 5080 
 5081     if (interpol) {
 5082     const double *Xi = X->val;
 5083     size_t sz = newn * sizeof *Xi;
 5084 
 5085     for (i=1; i<dset->v; i++) {
 5086         memcpy(dset->Z[i], Xi, sz);
 5087         Xi += newn;
 5088     }
 5089     } else {
 5090     size_t sz = oldn * sizeof *x;
 5091     int s;
 5092 
 5093     for (i=1; i<dset->v; i++) {
 5094         memcpy(x, dset->Z[i], sz);
 5095         s = 0;
 5096         for (t=0; t<oldn; t++) {
 5097         for (j=0; j<mult; j++) {
 5098             dset->Z[i][s++] = x[t];
 5099         }
 5100         }
 5101     }
 5102     }
 5103 
 5104     if (dset->pd == 1) {
 5105     /* starting with annual data */
 5106     strcpy(stobs, dset->stobs);
 5107     if (newpd == 4) {
 5108         strcat(stobs, ":1");
 5109     } else {
 5110         strcat(stobs, ":01");
 5111     }
 5112     } else {
 5113     /* starting with quarterly data */
 5114     int yr, qtr, mo;
 5115 
 5116     sscanf(dset->stobs, "%d:%d", &yr, &qtr);
 5117     mo = (qtr - 1) * 3 + 1;
 5118     sprintf(stobs, "%d:%02d", yr, mo);
 5119     }
 5120 
 5121     /* revise the sample range, if set */
 5122     if (dset->t1 > 0) {
 5123     dset->t1 *= mult;
 5124     }
 5125     if (dset->t2 < oldn - 1) {
 5126     dset->t2 = dset->t1 + (t2 - t1 + 1) * mult - 1;
 5127     }
 5128 
 5129     strcpy(dset->stobs, stobs);
 5130     dset->pd = newpd;
 5131     dset->sd0 = get_date_x(dset->pd, dset->stobs);
 5132     ntodate(dset->endobs, dset->n - 1, dset);
 5133 
 5134  bailout:
 5135 
 5136     free(x);
 5137     gretl_matrix_free(X);
 5138 
 5139     return err;
 5140 }