"Fossies" - the Fresh Open Source Software Archive

Member "gretl-2020e/plugin/excel_import.c" (1 Oct 2019, 42317 Bytes) of package /linux/misc/gretl-2020e.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "excel_import.c" see the Fossies "Dox" file reference documentation.

    1 /*
    2  *  gretl -- Gnu Regression, Econometrics and Time-series Library
    3  *  Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
    4  *
    5  *  This program is free software: you can redistribute it and/or modify
    6  *  it under the terms of the GNU General Public License as published by
    7  *  the Free Software Foundation, either version 3 of the License, or
    8  *  (at your option) any later version.
    9  *
   10  *  This program is distributed in the hope that it will be useful,
   11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
   12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   13  *  GNU General Public License for more details.
   14  *
   15  *  You should have received a copy of the GNU General Public License
   16  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
   17  *
   18  */
   19 
   20 /* Originally based on the Gnumeric excel plugin by Michael Meeks */
   21 
   22 #include "libgretl.h"
   23 #include "version.h"
   24 #include "gretl_string_table.h"
   25 #include "csvdata.h"
   26 
   27 #ifdef WIN32
   28 # include "gretl_win32.h"
   29 #endif
   30 
   31 #include <gtk/gtk.h>
   32 
   33 #include <string.h>
   34 #include <time.h>
   35 #include <errno.h>
   36 
   37 #include "importer.h"
   38 #include "biff.h"
   39 #include "build.h"
   40 
   41 typedef struct xls_info_ xls_info;
   42 
   43 struct sheetrow {
   44     int last, end;
   45     gchar **cells;
   46 };
   47 
   48 struct xls_info_ {
   49     int codepage;
   50     gchar **sst;
   51     int sstsize;
   52     int sstnext;
   53     int datacols;
   54     int totcols;
   55     int nrows;
   56     struct sheetrow *rows;
   57     char *blank_col;
   58     int *codelist;
   59     gretl_string_table *st;
   60 };
   61 
   62 static void free_xls_info (xls_info *xi);
   63 static int allocate_row_col (int row, int col, wbook *book,
   64                  xls_info *xi);
   65 
   66 int debug_print;
   67 
   68 #define cell_record(r) (r == BIFF_LABEL || \
   69                         r == BIFF_STRING || \
   70                         r == BIFF_NUMBER || \
   71                         r == BIFF_RK || \
   72                         r == BIFF_MULRK || \
   73                         r == BIFF_FORMULA || \
   74                         r == BIFF_LABELSST)
   75 
   76 enum {
   77     VARNAMES_OK = 0,
   78     VARNAMES_NULL,
   79     VARNAMES_NOTSTR,
   80     VARNAMES_INVALID,
   81     VARNAMES_NONE
   82 } varname_errors;
   83 
   84 #define EXCEL_IMPORTER
   85 #include "import_common.c"
   86 
   87 const char *adjust_rc = N_("Perhaps you need to adjust the "
   88                "starting column or row?");
   89 
   90 static int dbprintf (const char *format, ...)
   91 {
   92     va_list args;
   93     int len = 0;
   94 
   95     if (debug_print) {
   96     va_start(args, format);
   97     len = vfprintf(stderr, format, args);
   98     va_end(args);
   99     fflush(stderr);
  100     }
  101 
  102     return len;
  103 }
  104 
  105 static void print_version (void)
  106 {
  107     dbprintf("gretl, version %s, %s %s\n", GRETL_VERSION,
  108          _("build date"), BUILD_DATE);
  109 }
  110 
  111 static double get_le_double (const unsigned char *rec)
  112 {
  113     union {
  114         unsigned char cc[8];
  115         double d;
  116     } dconv;
  117 
  118     unsigned char *d;
  119     const unsigned char *s;
  120     int i;
  121 
  122     if (sizeof(double) != 8) {
  123     fputs("Size of double != 8; this won't work!\n", stderr);
  124     return NADBL;
  125     }
  126 
  127 #if G_BYTE_ORDER == G_BIG_ENDIAN
  128     for (s=rec+8, d=dconv.cc, i=0; i<8; i++) *(d++) = *(--s);
  129 #else
  130     for (s=rec, d=dconv.cc, i=0; i<8; i++) *(d++) = *(s++);
  131 #endif
  132 
  133     return dconv.d;
  134 }
  135 
  136 static double biff_get_rk (const unsigned char *ptr)
  137 {
  138     gint32 number;
  139     enum eType {
  140     eIEEE = 0,
  141     eIEEEx100,
  142     eInt,
  143     eIntx100
  144     } type;
  145 
  146     number = MS_OLE_GET_GUINT32(ptr);
  147     type = (number & 0x3);
  148 
  149     switch (type) {
  150     case eIEEE:
  151     case eIEEEx100:
  152         {
  153         guint8 tmp[8];
  154         double answer;
  155         int lp;
  156 
  157         for (lp = 0; lp < 4; lp++) {
  158         tmp[lp + 4] = (lp > 0) ? ptr[lp]: (ptr[lp] & 0xfc);
  159         tmp[lp] = 0;
  160         }
  161         answer = get_le_double(tmp);
  162         return (type == eIEEEx100)? answer / 100 : answer;
  163         }
  164     case eInt:
  165     return (double) (number >> 2);
  166     case eIntx100:
  167     number >>= 2;
  168     if ((number % 100) == 0) {
  169         return (double) (number/100);
  170     } else {
  171         return (double) (number/100.0);
  172     }
  173     }
  174 
  175     return NADBL;
  176 }
  177 
  178 static gchar *convert8to7 (const char *s, int count)
  179 {
  180     gchar *dest;
  181     int n;
  182 
  183     /* we'll skip any leading space */
  184     n = strspn(s, " \t");
  185     count -= n;
  186 
  187     if (count <= 0) {
  188     dest = g_strdup("");
  189     } else {
  190     if (count > VNAMELEN - 1) {
  191         count = VNAMELEN - 1;
  192     }
  193     dest = g_malloc(VNAMELEN);
  194     *dest = '\0';
  195     s += n;
  196     strncat(dest, s, count);
  197     iso_to_ascii(dest);
  198     tailstrip(dest);
  199     }
  200 
  201     dbprintf("convert8to7: returning '%s'\n", dest);
  202 
  203     return dest;
  204 }
  205 
  206 static gchar *convert16to7 (const unsigned char *s, int count)
  207 {
  208     char *dest;
  209     int i, u, j = 0;
  210 
  211     dest = g_malloc(VNAMELEN);
  212     if (dest == NULL) {
  213     return NULL;
  214     }
  215 
  216     memset(dest, 0, VNAMELEN);
  217 
  218     for (i=0; i<count && j<VNAMELEN-1; i++) {
  219     u = MS_OLE_GET_GUINT16(s);
  220     s += 2;
  221     if ((isalnum(u) || ispunct(u)) && u < 128) {
  222         dest[j++] = u;
  223     }
  224     }
  225 
  226     dbprintf("convert16to7: returning '%s'\n", dest);
  227 
  228     return dest;
  229 }
  230 
  231 static gchar *
  232 copy_unicode_string (xls_info *xi, unsigned char *src, int remlen,
  233              int *skip, int *slop)
  234 {
  235     int count = MS_OLE_GET_GUINT16(src);
  236     unsigned char flags = *(src + 2);
  237     int this_skip = 3, skip_to_next = 3;
  238     int csize = (flags & 0x01)? 2 : 1;
  239     gchar *ret = NULL;
  240 
  241     dbprintf("copy_unicode_string: count = %d, csize = %d\n",
  242         count, csize);
  243 
  244     if (flags & 0x08) {
  245     dbprintf(" contains Rich-Text info\n");
  246     }
  247     if (flags & 0x04) {
  248     dbprintf(" contains Far-East info\n");
  249     }
  250 
  251     skip_to_next += count * csize;
  252 
  253     if (flags & 0x08) {
  254     guint16 rich_text_info_len = 0;
  255 
  256     rich_text_info_len = 4 * MS_OLE_GET_GUINT16(src + 3);
  257     this_skip += 2;
  258     skip_to_next += 2 + rich_text_info_len;
  259     }
  260 
  261     if (flags & 0x04) {
  262     guint32 far_east_info_len = 0;
  263     int far_east_offset = 3;
  264 
  265     if (flags & 0x08) {
  266         far_east_offset = 5;
  267     }
  268     far_east_info_len = MS_OLE_GET_GUINT32(src + far_east_offset);
  269     this_skip += 4;
  270     skip_to_next += 4 + far_east_info_len;
  271     }
  272 
  273     /* skip for the next read */
  274     if (skip != NULL) {
  275     *skip = skip_to_next;
  276     }
  277 
  278     /* size check */
  279     if (slop != NULL) {
  280     if (remlen > 0 && this_skip + count > remlen) {
  281         *slop = this_skip + count - remlen;
  282     } else {
  283         *slop = 0;
  284     }
  285     }
  286 
  287     if (count > 64) {
  288     /* let's not mess with excessive strings */
  289     ret = g_strdup("bigstr");
  290     } else if (csize == 1) {
  291     char show[68];
  292 
  293     *show = '\0';
  294     strncat(show, (char *) src + this_skip, count);
  295     dbprintf("original string = '%s'\n", show);
  296     ret = convert8to7((char *) src + this_skip, count);
  297     } else {
  298     if (xi->codepage == 1200) {
  299         const gunichar2 *orig = (const gunichar2 *) (src + this_skip);
  300         GError *gerr = NULL;
  301         glong len = count;
  302         glong got, wrote;
  303 
  304         ret = g_utf16_to_utf8(orig, len, &got, &wrote, &gerr);
  305         dbprintf("utf16_to_utf8: got=%d, wrote=%d\n", (int) got, (int) wrote);
  306         if (gerr != NULL) {
  307         fprintf(stderr, "%s\n", gerr->message);
  308         g_error_free(gerr);
  309         g_free(ret);
  310         ret = NULL;
  311         }
  312     }
  313 
  314     if (ret == NULL) {
  315         /* fallback */
  316         ret = convert16to7(src + this_skip, count);
  317     }
  318     }
  319 
  320     return ret;
  321 }
  322 
  323 static gchar *make_string (gchar *str)
  324 {
  325     gchar *ret = NULL;
  326 
  327     if (str != NULL) {
  328     ret = g_strdup_printf("\"%s", str);
  329     g_free(str);
  330     } else {
  331     ret = g_strdup("\"");
  332     }
  333 
  334     return ret;
  335 }
  336 
  337 static int row_col_err (int row, int col, PRN *prn)
  338 {
  339     static int prevrow = -1, prevcol = -1;
  340     int err = 0;
  341 
  342     if (row < 0 || col < 0) {
  343     fprintf(stderr, "Error: got row=%d, col=%d\n", row, col);
  344     err = 1;
  345     } else if (row == prevrow && col == prevcol) {
  346     pprintf(prn, "Error: found a second cell entry for cell (%d, %d)\n",
  347         prevrow, prevcol);
  348     err = 1;
  349     }
  350 
  351     prevrow = row;
  352     prevcol = col;
  353 
  354     return err;
  355 }
  356 
  357 /* This function is called on LABELSST records: we check for possible
  358    NAs and also for numeric values that may have strayed into the
  359    string table, and that shouldn't really be treated as quoted
  360    strings.  We don't just use strtod at the outset, because some
  361    XLS files (put out by agencies that should know better!) contain
  362    numerical values that are "nicely formatted" as text using spaces
  363    or commas for thousands separators -- we try stripping this
  364    junk out first before doing the numeric-value test.
  365 */
  366 
  367 static int check_copy_string (struct sheetrow *prow, int row, int col,
  368                   int idx, const char *s)
  369 {
  370     dbprintf("inspecting sst[%d] = '%s'\n", idx, s);
  371 
  372     if (row > 0 && col > 0) {
  373     const char *numok = "0123456789 -,.";
  374     int i, len = strlen(s);
  375     int commas = 0, digits = 0;
  376     static int warned = 0;
  377 
  378     if (len == 0) {
  379         dbprintf(" converting to NA\n");
  380         prow->cells[col] = g_strdup("-999");
  381         return 0;
  382     }
  383 
  384     for (i=0; i<len; i++) {
  385         if (strchr(numok, s[i]) == NULL) {
  386         /* does not look promising for numerical value */
  387         len = 0;
  388         break;
  389         }
  390         if (isdigit(s[i])) {
  391         digits++;
  392         } else if (s[i] == ',') {
  393         commas++;
  394         }
  395     }
  396 
  397     if (len > 0 && digits > 0) {
  398         /* may be numerical? */
  399         char *p, *q = g_malloc(len + 1);
  400 
  401         if (q == NULL) return 1;
  402 
  403         p = q;
  404         for (i=0; i<len; i++) {
  405         if (s[i] != ' ' && s[i] != ',') {
  406             *p++ = s[i];
  407         }
  408         if (commas == 1 && s[i] == ',') {
  409             /* single comma could be for 000s, or decimal */
  410             if (!warned) {
  411             fprintf(stderr, "Warning: found ambiguous comma in '%s'\n", s);
  412             warned = 1;
  413             }
  414             if (len - i != 4) {
  415             /* comma is probably decimal separator? */
  416             *p++ = '.';
  417             }
  418         }
  419         }
  420         *p = '\0';
  421 
  422         /* If we don't do a rigorous check on q, as below, we
  423            may end up with zeros where there should be NAs.
  424         */
  425         if (numeric_string(q)) {
  426         dbprintf(" taking '%s' to be numeric string: %s\n", s, q);
  427         prow->cells[col] = q;
  428         return 0;
  429         } else {
  430         g_free(q);
  431         }
  432     }
  433     }
  434 
  435     dbprintf(" copying '%s' into place as string\n", s);
  436     prow->cells[col] = g_strdup_printf("\"%s", s);
  437 
  438     return 0;
  439 }
  440 
  441 static int is_date_format (int fmt)
  442 {
  443     int ret = 0;
  444 
  445     fprintf(stderr, "is_date_format? fmt=%d\n", fmt);
  446 
  447     if (fmt >= 14 && fmt <= 22) {
  448     ret = 1;
  449     } else if (fmt >= 45 && fmt <= 47) {
  450     ret = 1;
  451     } else if (fmt >= 50 && fmt <= 58) {
  452     ret = 1;
  453     } else if (fmt == 164) {
  454     /* FRED uses this */
  455     ret = 1;
  456     }
  457 
  458     return ret;
  459 }
  460 
  461 static int wbook_find_format (wbook *book, int xfref)
  462 {
  463     int fmt = -1;
  464 
  465     if (book->xf_list != NULL && xfref < book->xf_list[0]) {
  466     fmt = book->xf_list[xfref + 1];
  467     }
  468 
  469     return fmt;
  470 }
  471 
  472 static int func_is_date (guint8 *data, int version)
  473 {
  474     /* check for built-in DATE function */
  475     if (version < MS_BIFF_V4) {
  476     return MS_OLE_GET_GUINT8(data) == 65;
  477     } else {
  478     return MS_OLE_GET_GUINT16(data) == 65;
  479     }
  480 }
  481 
  482 #define t_func_size(v) ((v < MS_BIFF_V4)? 2 : 3)
  483 #define t_ref_size(v)  ((v < MS_BIFF_V8)? 4 : 5)
  484 
  485 /* Could be a date formula?  If so, it should have 3 cell reference fields
  486    and a trailing function ID == 65 */
  487 
  488 static void check_for_date_formula (BiffQuery *q, wbook *book)
  489 {
  490     int version = book->version;
  491     int offset = (version < MS_BIFF_V5)? 16 : 20;
  492     guint8 *fdata = q->data + offset;
  493     guint16 sz, targ;
  494     guint8 u1;
  495     int i;
  496 
  497     targ = 3 * t_ref_size(version) + t_func_size(version);
  498 
  499     if (version < MS_BIFF_V3) {
  500     sz = MS_OLE_GET_GUINT8(fdata);
  501     fdata += 1;
  502     } else {
  503     sz = MS_OLE_GET_GUINT16(fdata);
  504     fdata += 2;
  505     }
  506 
  507     /* There's a one-byte ambiguity over the size of the
  508        function ID field in the OpenOffice.org doc for
  509        BIFF, so we'll allow sz to be one byte bigger than
  510        targ.
  511     */
  512     if (sz != targ && sz != targ + 1) {
  513     return;
  514     }
  515 
  516     for (i=0; i<3; i++) {
  517     /* token ID */
  518     u1 = MS_OLE_GET_GUINT8(fdata);
  519     if (u1 != 0x44) { /* 0x44 = tRef */
  520         return;
  521     }
  522     fdata += t_ref_size(version);
  523     }
  524 
  525     u1 = MS_OLE_GET_GUINT8(fdata);
  526 
  527     if (u1 == 0x41 && func_is_date(fdata + 1, version)) { /* 0x41 = tFunc */
  528     fprintf(stderr, "Got DATE formula in first column\n");
  529     book_set_numeric_dates(book);
  530     }
  531 }
  532 
  533 /* Excel's NA() formula stores a "result" of 0.0, so if
  534    we get a formula result of zero we need to check for
  535    NA() and react accordingly. Use of NA() can be found
  536    in XLS files downloaded from FRED.
  537 */
  538 
  539 static int is_na_formula (unsigned char *ptr, wbook *book)
  540 {
  541     int version = book->version;
  542     int offset = (version < MS_BIFF_V5)? 16 : 20;
  543     guint16 sz;
  544 
  545     ptr += offset;
  546     sz = MS_OLE_GET_GUINT16(ptr);
  547 
  548     /* 0x42: indicates a built-in function
  549        10 is the index of the BIFF NA() function
  550     */
  551     if (sz == 4 && ptr[2] == 0x42 && ptr[4] == 10) {
  552     return 1;
  553     } else {
  554     return 0;
  555     }
  556 }
  557 
  558 #undef FORMAT_INFO
  559 
  560 static int process_item (BiffQuery *q, wbook *book, xls_info *xi,
  561              PRN *prn)
  562 {
  563     struct sheetrow *prow = NULL;
  564     static char **string_targ;
  565     static int slop; /* SST overslop */
  566     unsigned char *ptr = NULL;
  567     int i = 0, j = 0;
  568     double val;
  569 
  570     if (cell_record(q->ls_op)) {
  571     i = EX_GETROW(q);
  572     j = EX_GETCOL(q);
  573     if (row_col_err(i, j, prn)) {
  574         return 1;
  575     }
  576     if (q->ls_op == BIFF_NUMBER || q->ls_op == BIFF_RK || q->ls_op == BIFF_MULRK) {
  577         guint16 xfref = EX_GETXF(q);
  578         int fmt = wbook_find_format(book, xfref);
  579 
  580 #if 0
  581         fprintf(stderr, "Numeric cell (%d, %d), XF index = %d, fmt = %d\n",
  582             i, j, (int) xfref, fmt);
  583 #endif
  584         if (i == book->row_offset + 1 &&
  585         j == book->col_offset &&
  586         is_date_format(fmt)) {
  587         fprintf(stderr, "Testing first obs cell (%d, %d): date format %d\n",
  588             i, j, fmt);
  589         book_set_numeric_dates(book);
  590         }
  591     }
  592     }
  593 
  594     switch (q->ls_op) {
  595 
  596     case BIFF_SST: {
  597     int k, skip, remlen, oldsz = xi->sstsize;
  598     guint16 sz;
  599 
  600     if (xi->sst != NULL) {
  601         fprintf(stderr, "Got a second string table: this is nonsense\n");
  602         return 1;
  603     }
  604 
  605     sz = MS_OLE_GET_GUINT16(q->data + 4);
  606     xi->sstsize += sz;
  607     xi->sst = realloc(xi->sst, xi->sstsize * sizeof *xi->sst);
  608     if (xi->sst == NULL) {
  609         return 1;
  610     }
  611 
  612     dbprintf("Got SST: allocated for %d strings (%d bytes), %p\n",
  613          xi->sstsize, xi->sstsize * sizeof *xi->sst, (void *) xi->sst);
  614 
  615     for (k=oldsz; k<xi->sstsize; k++) {
  616         /* careful: initialize all pointers to NULL */
  617         xi->sst[k] = NULL;
  618     }
  619 
  620     ptr = q->data + 8;
  621 
  622     for (k=oldsz; k<xi->sstsize; k++) {
  623         remlen = q->length - (ptr - q->data);
  624         dbprintf("Working on sst[%d], data offset=%d, remlen=%d\n",
  625              k, (int) (ptr - q->data), remlen);
  626         if (remlen <= 0) {
  627         break;
  628         }
  629         xi->sst[k] = copy_unicode_string(xi, ptr, remlen, &skip, &slop);
  630         ptr += skip;
  631     }
  632 
  633     if (k < xi->sstsize) {
  634         xi->sstnext = k;
  635     }
  636 
  637     break;
  638     }
  639 
  640     case BIFF_CONTINUE:
  641     dbprintf("Got CONTINUE, xi->sstnext = %d, len = %d\n",
  642          xi->sstnext, (int) q->length);
  643     if (xi->sstnext > 0) {
  644         int k, skip, remlen;
  645 
  646         ptr = q->data;
  647         if (slop > 0) {
  648         unsigned char flags = *ptr;
  649         int csize = (flags & 0x01)? 2 : 1;
  650 
  651         dbprintf("BIFF_CONTINUE: slop = %d, csize = %d\n", (int) slop,
  652              (int) csize);
  653         ptr += 1 + csize * slop;
  654         }
  655         for (k=xi->sstnext; k<xi->sstsize; k++) {
  656         remlen = q->length - (ptr - q->data);
  657         if (remlen <= 0) {
  658             break;
  659         }
  660         dbprintf("Working on sst[%d], remlen = %d\n", k, remlen);
  661         if (xi->sst[k] != NULL) {
  662             g_free(xi->sst[k]);
  663         }
  664         xi->sst[k] = copy_unicode_string(xi, ptr, remlen, &skip, &slop);
  665         ptr += skip;
  666         }
  667         if (k < xi->sstsize) {
  668         xi->sstnext = k;
  669         }
  670     }
  671     break;
  672 
  673     case BIFF_LABEL:
  674     dbprintf("Got LABEL, row=%d, col=%d\n", i, j);
  675     if (allocate_row_col(i, j, book, xi)) {
  676         return E_ALLOC;
  677     } else {
  678         unsigned int len = MS_OLE_GET_GUINT16(q->data + 6);
  679 
  680         prow = xi->rows + i;
  681         ptr = q->data + 8;
  682         fprintf(stderr, "BIFF_LABEL: calling convert8to7\n");
  683         prow->cells[j] = make_string(convert8to7((char *) ptr, len));
  684     }
  685     break;
  686 
  687     case BIFF_LABELSST:
  688     dbprintf("Got LABELSST, row=%d, col=%d\n", i, j);
  689     if (allocate_row_col(i, j, book, xi)) {
  690         return E_ALLOC;
  691     } else {
  692         unsigned int sidx = MS_OLE_GET_GUINT16(q->data + 6);
  693 
  694         prow = xi->rows + i;
  695         if (sidx >= xi->sstsize) {
  696         pprintf(prn, _("String index too large"));
  697         pputc(prn, '\n');
  698         } else if (xi->sst[sidx] != NULL) {
  699         check_copy_string(prow, i, j, sidx, xi->sst[sidx]);
  700         } else {
  701         dbprintf("sst[%d] seems to be NULL, leaving string blank\n", (int) sidx);
  702         prow->cells[j] = g_malloc(2);
  703         if (prow->cells[j] != NULL) {
  704             prow->cells[j][0] = '\0';
  705         }
  706         }
  707     }
  708     break;
  709 
  710     case BIFF_NUMBER:
  711     if (allocate_row_col(i, j, book, xi)) {
  712         return E_ALLOC;
  713     } else {
  714         val = get_le_double(q->data + 6);
  715         prow = xi->rows + i;
  716         prow->cells[j] = g_strdup_printf("%.15g", val);
  717         dbprintf("Got NUMBER (%g), row=%d, col=%d\n", val, i, j);
  718     }
  719     break;
  720 
  721     case BIFF_RK:
  722     if (allocate_row_col(i, j, book, xi)) {
  723         return E_ALLOC;
  724     } else {
  725         val = biff_get_rk(q->data + 6);
  726         prow = xi->rows + i;
  727         prow->cells[j] = g_strdup_printf("%.15g", val);
  728         dbprintf("Got RK (%g), row=%d, col=%d\n", val, i, j);
  729     }
  730     break;
  731 
  732     case BIFF_MULRK: {
  733     int k, ncols = (q->length - 6) / 6;
  734 
  735     dbprintf("Got MULRK, row=%d, first_col=%d, ncols=%d\n", i, j, ncols);
  736     for (k=0; k<ncols; k++) {
  737         if (allocate_row_col(i, j, book, xi)) {
  738         return E_ALLOC;
  739         }
  740         val = biff_get_rk(q->data + 6 + 6 * k);
  741         prow = xi->rows + i; /* might have moved */
  742         prow->cells[j] = g_strdup_printf("%.15g", val);
  743         dbprintf(" MULRK[col=%d] = %g\n", j, val);
  744         j++;
  745     }
  746     break;
  747     }
  748 
  749     case BIFF_FORMULA:
  750     dbprintf("Got FORMULA, row=%d, col=%d\n", i, j);
  751     if (allocate_row_col(i, j, book, xi)) {
  752         return E_ALLOC;
  753     } else {
  754         /* the result of the formula should be at offset 6 */
  755         ptr = q->data + 6;
  756         prow = xi->rows + i;
  757         if (ptr[6] == 0xff && ptr[7] == 0xff) {
  758         /* string, boolean or error result */
  759         unsigned char fcode = ptr[0];
  760 
  761         if (fcode == 0x0) {
  762             /* string formula: record the target for following
  763                STRING record */
  764             string_targ = prow->cells + j;
  765         } else if (fcode == 0x1) {
  766             /* boolean value */
  767             prow->cells[j] = g_strdup((ptr[2])? "1" : "0");
  768         } else if (fcode == 0x2 || fcode == 0x3) {
  769             /* error code or empty */
  770             prow->cells[j] = g_strdup("-999");
  771         } else {
  772             fprintf(stderr, "Bad formula code 0x%u\n",
  773                 (unsigned) fcode);
  774             prow->cells[j] = g_strdup("-999");
  775         }
  776         } else {
  777         /* should have a floating-point result */
  778         val = get_le_double(ptr);
  779         if (val == 0.0 && is_na_formula(q->data, book)) {
  780             dbprintf("floating-point value = na()\n");
  781             prow->cells[j] = g_strdup("-999");
  782         } else if (isnan(val)) {
  783             dbprintf("floating-point value is NaN\n");
  784             prow->cells[j] = g_strdup("-999");
  785         } else {
  786             dbprintf(" floating-point value = %g\n", val);
  787             prow->cells[j] = g_strdup_printf("%.15g", val);
  788             if (i == book->row_offset + 1 && j == book->col_offset) {
  789             /* could be a date formula? */
  790             check_for_date_formula(q, book);
  791             }
  792         }
  793         }
  794     }
  795     break;
  796 
  797     case BIFF_STRING:
  798     if (string_targ == NULL) {
  799         dbprintf("String record without preceding string formula\n");
  800     } else {
  801         gchar *tmp = copy_unicode_string(xi, q->data, 0, NULL, NULL);
  802 
  803         *string_targ = make_string(tmp);
  804         dbprintf("Filled out string formula with '%s'\n", *string_targ);
  805         string_targ = NULL; /* handled */
  806     }
  807     break;
  808 
  809     case BIFF_BOF:
  810     if (xi->rows != NULL) {
  811         fprintf(stderr, "BOF when current sheet is not flushed\n");
  812         return 1;
  813     }
  814     if (1) {
  815         unsigned version, boftype;
  816 
  817         version = MS_OLE_GET_GUINT16(q->data + 0);
  818         boftype = MS_OLE_GET_GUINT16(q->data + 2);
  819         dbprintf("Got BOF: version=%x, type=%x\n", version, boftype);
  820     }
  821     break;
  822 
  823     case BIFF_FORMAT: {
  824     int idx = MS_OLE_GET_GUINT16(q->data + 0);
  825 
  826     if (idx >= 14 && idx <= 17) {
  827         fprintf(stderr, "Got date format: index %d\n", idx);
  828     }
  829     break;
  830     }
  831 
  832 #ifdef FORMAT_INFO
  833     case BIFF_COLINFO:
  834     fprintf(stderr, "Got BIFF_COLINFO: col range (%d, %d), XF index %d\n",
  835         (int) MS_OLE_GET_GUINT16(q->data + 0),
  836         (int) MS_OLE_GET_GUINT16(q->data + 2),
  837         (int) MS_OLE_GET_GUINT16(q->data + 6));
  838     break;
  839 
  840     case BIFF_XF: {
  841     unsigned short tp = MS_OLE_GET_GUINT16(q->data + 4);
  842 
  843     fprintf(stderr, "Got BIFF_XF: format record index %d ",
  844         (int) MS_OLE_GET_GUINT16(q->data + 2));
  845     if (tp & 0x04) {
  846         fprintf(stderr, "(style XF)\n");
  847     } else {
  848         fprintf(stderr, "(cell XF)\n");
  849     }
  850     break;
  851     }
  852 #endif
  853 
  854     default:
  855     break;
  856     }
  857 
  858     return 0;
  859 }
  860 
  861 static int handled_record (BiffQuery *q)
  862 {
  863     if (q->opcode == BIFF_SST ||
  864     q->opcode == BIFF_CONTINUE ||
  865     q->opcode == BIFF_LABELSST ||
  866     q->opcode == BIFF_MULRK ||
  867     q->opcode == BIFF_FORMULA) {
  868     return 1;
  869     }
  870 
  871     if (q->ms_op == 0x02) {
  872     if (q->ls_op == BIFF_LABEL ||
  873         q->ls_op == BIFF_NUMBER ||
  874         q->ls_op == BIFF_RK ||
  875         q->ls_op == BIFF_STRING) {
  876         return 1;
  877     }
  878     }
  879 
  880 #ifdef FORMAT_INFO
  881     if (q->opcode == BIFF_COLINFO ||
  882     q->opcode == BIFF_XF) {
  883     return 1;
  884     }
  885     if (q->ms_op == 0x04) {
  886     if (q->ls_op == BIFF_FORMAT) {
  887         return 1;
  888     }
  889     }
  890 #endif
  891 
  892     if (q->ms_op == 0x08) {
  893     if (q->ls_op == BIFF_BOF) return 1;
  894     }
  895 
  896     return 0;
  897 }
  898 
  899 static int process_sheet (const char *filename, wbook *book, xls_info *xi,
  900               PRN *prn)
  901 {
  902     int err = 0, gotbof = 0, eofcount = 0;
  903     static int skipped;
  904     long offset = book->byte_offsets[book->selected];
  905     MsOleStream *stream;
  906     MsOleErr result;
  907     BiffQuery *q;
  908     MsOle *file;
  909 
  910     if (ms_ole_open(&file, filename)) {
  911     return 1;
  912     }
  913 
  914     result = ms_ole_stream_open_workbook(&stream, file);
  915 
  916     if (result != MS_OLE_ERR_OK) {
  917     ms_ole_destroy(&file);
  918     return 1;
  919     }
  920 
  921     fputs("Reading file...\n", stderr);
  922     q = ms_biff_query_new(stream);
  923 
  924     while (!gotbof && ms_biff_query_next(q)) {
  925     if (q->ls_op == BIFF_BOF) {
  926         gotbof = 1;
  927         break;
  928     }
  929     }
  930 
  931     if (!gotbof) {
  932     pprintf(prn, _("%s: No BOF record found"), filename);
  933     return 1;
  934     }
  935 
  936     while (!err && ms_biff_query_next(q)) {
  937     dbprintf("At %lu: q->opcode=0x%02x\n", (unsigned long) q->streamPos, q->opcode);
  938     if (q->opcode == BIFF_EOF) {
  939         dbprintf("got MSEOF at %lu\n", (unsigned long) ms_ole_stream_position(stream));
  940         eofcount++;
  941 
  942         if (eofcount == 1) {
  943         if (ms_ole_stream_position(stream) < offset) {
  944             /* skip to the worksheet we want? */
  945             while (q->streamPos < offset && ms_biff_query_next(q)) ;
  946             fprintf(stderr, "skipped forward to %lu\n",
  947                 (unsigned long) q->streamPos);
  948         } else {
  949             fprintf(stderr, "reading worksheet at %lu\n",
  950                 (unsigned long) ms_ole_stream_position(stream));
  951         }
  952         }
  953 
  954         if (eofcount == 2) {
  955         break;
  956         } else {
  957         continue;
  958         }
  959     }
  960 
  961     if (handled_record(q)) {
  962         err = process_item(q, book, xi, prn);
  963     } else if (q->ms_op == 0x02 && q->ls_op == BIFF_ROW) {
  964         dbprintf("Got BIFF_ROW\n");
  965     } else if (q->opcode == BIFF_DBCELL) {
  966         dbprintf("Got BIFF_DBCELL\n");
  967     } else if (q->opcode == 0x42) {
  968         if (q->length == 2 && q->data != NULL) {
  969         int cp = MS_OLE_GET_GUINT16(q->data);
  970 
  971         fprintf(stderr, "CODEPAGE: got %d\n", cp);
  972         xi->codepage = cp;
  973         }
  974     } else {
  975         if (q->opcode != skipped) {
  976         dbprintf("skipping unhandled opcode 0x%02x\n", q->opcode);
  977         }
  978         skipped = q->opcode;
  979     }
  980     }
  981 
  982     ms_biff_query_destroy(q);
  983     ms_ole_stream_close(&stream);
  984     ms_ole_destroy(&file);
  985 
  986     return err;
  987 }
  988 
  989 static void row_init (struct sheetrow *row)
  990 {
  991     row->last = 0;
  992     row->end = 0;
  993     row->cells = NULL;
  994 }
  995 
  996 static int allocate_row_col (int i, int j, wbook *book,
  997                  xls_info *xi)
  998 {
  999     static int started;
 1000     int k;
 1001 
 1002     if (!started && i > book->row_offset) {
 1003     book->row_offset = i;
 1004     fprintf(stderr, "Missing rows: trying an offset of %d\n", i);
 1005     }
 1006 
 1007     started = 1;
 1008 
 1009     dbprintf("allocate: row=%d, col=%d, nrows=%d\n", i, j, xi->nrows);
 1010 
 1011     if (i >= xi->nrows) {
 1012     int new_nrows = (i / 16 + 1) * 16;
 1013     struct sheetrow *myrows;
 1014 
 1015     myrows = realloc(xi->rows, new_nrows * sizeof *myrows);
 1016     if (myrows == NULL) {
 1017         return 1;
 1018     }
 1019 
 1020     xi->rows = myrows;
 1021 
 1022     for (k=xi->nrows; k<new_nrows; k++) {
 1023         dbprintf("allocate: initing rows[%d]\n", k);
 1024         row_init(&xi->rows[k]);
 1025         dbprintf("rows[%d].end=%d\n", i, xi->rows[k].end);
 1026     }
 1027     xi->nrows = new_nrows;
 1028     }
 1029 
 1030     dbprintf("allocate: col=%d and rows[%d].end = %d\n", j, i, xi->rows[i].end);
 1031 
 1032     if (j >= xi->rows[i].end) {
 1033     int newcol = (j / 16 + 1) * 16;
 1034     gchar **cells;
 1035 
 1036     dbprintf("allocate: reallocing rows[%d].cells to size %d\n", i, newcol);
 1037     cells = realloc(xi->rows[i].cells, newcol * sizeof *cells);
 1038 
 1039     if (cells == NULL) {
 1040         return 1;
 1041     }
 1042 
 1043     xi->rows[i].cells = cells;
 1044 
 1045     for (k=xi->rows[i].end; k<newcol; k++) {
 1046         xi->rows[i].cells[k] = NULL;
 1047     }
 1048     xi->rows[i].end = newcol;
 1049     }
 1050 
 1051     if (j > xi->rows[i].last) {
 1052     xi->rows[i].last = j;
 1053     }
 1054 
 1055     return 0;
 1056 }
 1057 
 1058 static void xls_info_init (xls_info *xi)
 1059 {
 1060     xi->codepage = 0;
 1061     xi->sst = NULL;
 1062     xi->sstsize = 0;
 1063     xi->datacols = 0;
 1064     xi->totcols = 0;
 1065     xi->nrows = 0;
 1066     xi->rows = NULL;
 1067     xi->blank_col = NULL;
 1068     xi->codelist = NULL;
 1069     xi->st = NULL;
 1070 }
 1071 
 1072 static void free_xls_info (xls_info *xi)
 1073 {
 1074     int i, j;
 1075 
 1076     dbprintf("free_xls_info(), nrows=%d\n", xi->nrows);
 1077 
 1078     /* free shared string table */
 1079     if (xi->sst != NULL) {
 1080     for (i=0; i<xi->sstsize; i++) {
 1081         g_free(xi->sst[i]);
 1082     }
 1083     free(xi->sst);
 1084     xi->sst = NULL;
 1085     }
 1086 
 1087     /* free cells */
 1088     if (xi->rows != NULL) {
 1089     for (i=0; i<xi->nrows; i++) {
 1090         if (xi->rows[i].cells == NULL) {
 1091         dbprintf("rows[%d].cells = NULL, skipping free\n", i);
 1092         continue;
 1093         }
 1094         for (j=0; j<xi->rows[i].end; j++) {
 1095         if (xi->rows[i].cells[j] != NULL) {
 1096             dbprintf("Freeing rows[%d].cells[%d] at %p\n",
 1097                  i, j, (void *) xi->rows[i].cells[j]);
 1098             g_free(xi->rows[i].cells[j]);
 1099         }
 1100         }
 1101         dbprintf("Freeing rows[%d].cells at %p\n", i, (void *) xi->rows[i].cells);
 1102         free(xi->rows[i].cells);
 1103     }
 1104     free(xi->rows);
 1105     }
 1106 
 1107     free(xi->blank_col);
 1108     free(xi->codelist);
 1109     if (xi->st != NULL) {
 1110     gretl_string_table_destroy(xi->st);
 1111     }
 1112 }
 1113 
 1114 #define IS_STRING(v) ((v[0] == '"'))
 1115 
 1116 /* check for full set of strings in first column to be read (which may
 1117    be at an offset into the worksheet)
 1118  */
 1119 
 1120 static int first_col_strings (wbook *book, xls_info *xi)
 1121 {
 1122     int i, j = book->col_offset;
 1123     int startrow = book->row_offset + 1;
 1124     int ret = 1;
 1125 
 1126     dbprintf("checking for first column strings...\n");
 1127 
 1128     for (i=startrow; i<xi->nrows; i++) {
 1129     dbprintf("book->row_offset=%d, i=%d\n", book->row_offset, i);
 1130     dbprintf("rows = %p\n", (void *) xi->rows);
 1131     if (xi->rows == NULL || xi->rows[i].cells == NULL ||
 1132         xi->rows[i].cells[j] == NULL ||
 1133         !IS_STRING(xi->rows[i].cells[j])) {
 1134         dbprintf("no: not a string at row %d\n", i);
 1135         ret = 0;
 1136         break;
 1137     }
 1138     dbprintf("first_col_strings: rows[%d].cells[%d]: '%s'\n", i, j,
 1139          xi->rows[i].cells[j]);
 1140     }
 1141 
 1142     if (ret) {
 1143     book_set_obs_labels(book);
 1144     }
 1145 
 1146     return ret;
 1147 }
 1148 
 1149 #define obs_string(s) (!strcmp(s, "obs") || !strcmp(s, "id"))
 1150 
 1151 static int check_all_varnames (wbook *book, xls_info *xi, PRN *prn)
 1152 {
 1153     int j, i = book->row_offset;
 1154     int startcol = book->col_offset;
 1155     int realcols = 0;
 1156     int gotcols = 0;
 1157     int vnames = 0;
 1158     int ret = VARNAMES_NONE;
 1159 
 1160     if (book_obs_labels(book) || book_numeric_dates(book)) {
 1161     startcol++;
 1162     gotcols = 1;
 1163     }
 1164 
 1165     if (xi->rows[i].cells == NULL) {
 1166     fprintf(stderr, "Row %d is empty, trying lower...\n", i);
 1167     while (i < xi->nrows - 1 && xi->rows[i].cells == NULL) {
 1168         book->row_offset += 1;
 1169         i++;
 1170     }
 1171     }
 1172 
 1173     for (j=startcol; j<xi->totcols; j++) {
 1174     if (xi->blank_col[j]) {
 1175         gotcols++;
 1176         continue;
 1177     }
 1178 
 1179     if (xi->rows[i].cells[j] == NULL) {
 1180         dbprintf("got_varnames: rows[%d].cells[%d] is NULL\n", i, j);
 1181         break;
 1182     }
 1183 
 1184     gotcols++;
 1185 
 1186     dbprintf("got_varnames: rows[%d].cells[%d] is '%s'\n", i, j,
 1187          xi->rows[i].cells[j]);
 1188 
 1189     if (IS_STRING(xi->rows[i].cells[j])) {
 1190         /* skip beyond the quote */
 1191         char *test = xi->rows[i].cells[j] + 1;
 1192 
 1193         /* "obs" or "id" is OK in the first col of the selection,
 1194            but not thereafter */
 1195         if (j == startcol && obs_string(test)) {
 1196         /* pass along */
 1197         ;
 1198         } else {
 1199         int verr = check_imported_varname(test, 0, i, j, prn);
 1200 
 1201         if (verr) {
 1202             return verr;
 1203         }
 1204         }
 1205         vnames++;
 1206     }
 1207     realcols++;
 1208     }
 1209 
 1210     if (vnames == realcols) {
 1211     ret = VARNAMES_OK;
 1212     } else if (vnames > 0) {
 1213     ret = VARNAMES_NOTSTR;
 1214     }
 1215 
 1216     return ret;
 1217 }
 1218 
 1219 static int missval_string (const char *s)
 1220 {
 1221     s++;
 1222 
 1223     return (*s == '\0' || import_na_string(s));
 1224 }
 1225 
 1226 struct string_err {
 1227     int row;
 1228     int column;
 1229     char *str;
 1230 };
 1231 
 1232 static void clear_string_err (struct string_err *strerr)
 1233 {
 1234     strerr->row = 0;
 1235     strerr->column = 0;
 1236     free(strerr->str);
 1237     strerr->str = NULL;
 1238 }
 1239 
 1240 #define xls_cell(x,i,j) (x->rows[i].cells[j])
 1241 
 1242 /* check for invalid data in the selected data block */
 1243 
 1244 static int
 1245 check_data_block (wbook *book, xls_info *xi, int *missvals,
 1246           struct string_err *strerr)
 1247 {
 1248     int *codelist = NULL;
 1249     int startcol = book->col_offset;
 1250     int startrow = book->row_offset + 1;
 1251     int j, i, err = 0;
 1252 
 1253     if (book_obs_labels(book) || book_numeric_dates(book)) {
 1254     startcol++;
 1255     }
 1256 
 1257     strerr->row = 0;
 1258     strerr->column = 0;
 1259     strerr->str = NULL;
 1260 
 1261     for (j=startcol; j<xi->totcols && !err; j++) {
 1262     int strvals = 0;
 1263 
 1264     dbprintf("data_block: col=%d\n", j);
 1265     if (xi->blank_col[j]) {
 1266         continue;
 1267     }
 1268     for (i=startrow; i<xi->nrows; i++) {
 1269         dbprintf(" rows[%d], end = %d\n", i, xi->rows[i].end);
 1270         if (xi->rows[i].cells  == NULL) {
 1271         dbprintf("  rows[%d].cells = NULL\n", i);
 1272         *missvals = 1;
 1273         } else if (j >= xi->rows[i].end) {
 1274         dbprintf("  short row, fell off the end\n");
 1275         *missvals = 1;
 1276         } else if (xls_cell(xi, i, j) == NULL) {
 1277         dbprintf("  rows[%d].cells[%d] = NULL\n", i, j);
 1278         xi->rows[i].cells[j] = g_strdup("-999");
 1279         *missvals = 1;
 1280         } else if (IS_STRING(xls_cell(xi, i, j))) {
 1281         if (missval_string(xls_cell(xi, i, j))) {
 1282             dbprintf("  rows[%d].cells[%d] = missval\n", i, j);
 1283             g_free(xi->rows[i].cells[j]);
 1284             xi->rows[i].cells[j] = g_strdup("-999");
 1285             *missvals = 1;
 1286         } else {
 1287             dbprintf("  rows[%d].cells[%d]: %s (string)\n",
 1288                  i, j, xls_cell(xi, i, j));
 1289             strvals++;
 1290             if (strerr->row == 0) {
 1291             strerr->row = i + 1;
 1292             strerr->column = j + 1;
 1293             strerr->str = g_strdup(xls_cell(xi, i, j));
 1294             }
 1295         }
 1296         } else {
 1297         dbprintf("  rows[%d].cells[%d]: %s (numeric?)\n",
 1298              i, j, xls_cell(xi, i, j));
 1299         }
 1300     }
 1301     if (strvals > 0) {
 1302         dbprintf(" col %d: %d string values\n", j, strvals);
 1303         if (strvals == xi->nrows - startrow) {
 1304         int k = j - startcol + 1;
 1305 
 1306         fprintf(stderr, "col %d: all strings -> accept\n", j);
 1307         codelist = gretl_list_append_term(&codelist, k);
 1308         clear_string_err(strerr);
 1309         } else {
 1310         err = E_DATA;
 1311         }
 1312     }
 1313     }
 1314 
 1315     if (codelist != NULL) {
 1316     printlist(codelist, "codelist");
 1317     if (err) {
 1318         free(codelist);
 1319     } else {
 1320         xi->codelist = codelist;
 1321     }
 1322     }
 1323 
 1324     return err;
 1325 }
 1326 
 1327 /* determine the number of actual data columns, starting from a
 1328    given offset into the worksheet, and allowing for the
 1329    possibility that the first selected column contains string
 1330    labels
 1331 */
 1332 
 1333 static int
 1334 n_vars_from_col (wbook *book, int totcols, char *blank_col)
 1335 {
 1336     int offset = book->col_offset;
 1337     int i, nv = 1;
 1338 
 1339     if (book_time_series(book) || book_obs_labels(book)) {
 1340     /* got a first non-data column */
 1341     offset++;
 1342     }
 1343 
 1344     for (i=offset; i<totcols; i++) {
 1345     if (!blank_col[i]) nv++;
 1346     }
 1347 
 1348     dbprintf("n_vars_from_col: totcols=%d, nv=%d\n", totcols, nv);
 1349 
 1350     return nv;
 1351 }
 1352 
 1353 static int
 1354 transcribe_data (wbook *book, xls_info *xi, DATASET *dset,
 1355          PRN *prn)
 1356 {
 1357     int startcol = book->col_offset;
 1358     int roff = book->row_offset;
 1359     int i, t, j = 1;
 1360     int err = 0;
 1361 
 1362     if (book_obs_labels(book) || book_time_series(book)) {
 1363     startcol++;
 1364     }
 1365 
 1366     if (xi->codelist != NULL) {
 1367     xi->st = gretl_string_table_new(xi->codelist);
 1368     if (xi->st == NULL) {
 1369         return E_ALLOC;
 1370     }
 1371     }
 1372 
 1373     for (i=startcol; i<xi->totcols && !err; i++) {
 1374     const char *val = NULL;
 1375     int ts, strvals = 0;
 1376 
 1377     if (xi->blank_col[i]) {
 1378         continue;
 1379     }
 1380 
 1381     if (j >= dset->v) {
 1382         break;
 1383     }
 1384 
 1385     dset->varname[j][0] = 0;
 1386     if (book_auto_varnames(book)) {
 1387         sprintf(dset->varname[j], "v%d", j);
 1388     } else if (xi->rows[roff].cells[i] == NULL) {
 1389         sprintf(dset->varname[j], "v%d", j);
 1390     } else if (i >= xi->rows[roff].end) {
 1391         sprintf(dset->varname[j], "v%d", j);
 1392     } else {
 1393         strncat(dset->varname[j], xi->rows[roff].cells[i] + 1,
 1394             VNAMELEN - 1);
 1395         dbprintf("accessing rows[%d].cells[%d] at %p\n",
 1396              roff, i, (void *) xi->rows[roff].cells[i]);
 1397     }
 1398 
 1399     /* remedial: replace space with underscore */
 1400     gretl_charsub(dset->varname[j], ' ', '_');
 1401 
 1402     err = check_varname(dset->varname[j]);
 1403     if (err) {
 1404         pprintf(prn, "%s\n", gretl_errmsg_get());
 1405         break;
 1406     }
 1407 
 1408     dbprintf("set varname[%d] = '%s'\n", j, dset->varname[j]);
 1409 
 1410     if (in_gretl_list(xi->codelist, j)) {
 1411         strvals = 1;
 1412     }
 1413 
 1414     for (t=0; t<dset->n && !err; t++) {
 1415         ts = t + 1 + roff;
 1416         if (xi->rows[ts].cells == NULL || i >= xi->rows[ts].end ||
 1417         xi->rows[ts].cells[i] == NULL) {
 1418         continue;
 1419         }
 1420 
 1421         val = xi->rows[ts].cells[i];
 1422         if (val != NULL && *val == '"') {
 1423         val++;
 1424         }
 1425 
 1426         dbprintf("accessing rows[%d].cells[%d] at %p\n", ts, i,
 1427              (void *) val);
 1428         dbprintf("setting Z[%d][%d] = rows[%d].cells[%d] "
 1429              "= '%s'\n", j, t, i, ts, val);
 1430 
 1431         if (strvals) {
 1432         int xjt = gretl_string_table_index(xi->st, val, j, 0, prn);
 1433 
 1434         if (xjt > 0) {
 1435             dset->Z[j][t] = xjt;
 1436         } else {
 1437             err = E_DATA;
 1438         }
 1439         } else {
 1440         dset->Z[j][t] = atof(val);
 1441         if (dset->Z[j][t] == -999 || dset->Z[j][t] == -9999) {
 1442             dset->Z[j][t] = NADBL;
 1443         }
 1444         }
 1445     }
 1446 
 1447     j++;
 1448     }
 1449 
 1450     return err;
 1451 }
 1452 
 1453 static int get_sheet_dimensions (wbook *book, xls_info *xi, PRN *prn)
 1454 {
 1455     char *blanks = NULL;
 1456     int i, j;
 1457 
 1458     /* trim any trailing blank rows */
 1459     for (i=xi->nrows-1; i>=0; i--) {
 1460     if (xi->rows[i].cells == NULL) {
 1461         xi->nrows -= 1;
 1462     } else {
 1463         break;
 1464     }
 1465     }
 1466 
 1467     for (i=0; i<xi->nrows; i++) {
 1468     if (xi->rows[i].cells != NULL) {
 1469         if (xi->rows[i].last + 1 > xi->totcols) {
 1470         xi->totcols = xi->rows[i].last + 1;
 1471         }
 1472     }
 1473     }
 1474 
 1475     if (xi->totcols <= 0 || xi->nrows < 1) {
 1476     pputs(prn, _("No data found.\n"));
 1477     pputs(prn, _(adjust_rc));
 1478     return 1;
 1479     }
 1480 
 1481     blanks = malloc(xi->totcols);
 1482     if (blanks == NULL) {
 1483     return E_ALLOC;
 1484     }
 1485 
 1486     memset(blanks, 1, xi->totcols);
 1487 
 1488     for (i=0; i<xi->nrows; i++) {
 1489     if (xi->rows[i].cells == NULL) {
 1490         continue;
 1491     }
 1492     for (j=0; j<=xi->rows[i].last; j++) {
 1493         if (xi->rows[i].cells[j] != NULL) {
 1494         if (blanks[j]) {
 1495             blanks[j] = 0;
 1496         }
 1497         }
 1498     }
 1499     }
 1500 
 1501     for (i=0; i<xi->totcols; i++) {
 1502     if (!blanks[i]) {
 1503         xi->datacols += 1;
 1504     }
 1505     }
 1506 
 1507     if (book_numeric_dates(book)) {
 1508     xi->datacols -= 1;
 1509     }
 1510 
 1511     printf("rows=%d, total cols=%d, data cols=%d\n", xi->nrows,
 1512        xi->totcols, xi->datacols);
 1513 
 1514     if (xi->datacols < 1) {
 1515     pputs(prn, _("No data found.\n"));
 1516     pputs(prn, _(adjust_rc));
 1517     free(blanks);
 1518     return 1;
 1519     }
 1520 
 1521     xi->blank_col = blanks;
 1522 
 1523     return 0;
 1524 }
 1525 
 1526 static int col0_is_numeric (xls_info *xi, int row_offset, int col_offset)
 1527 {
 1528     int t, tstart = 1 + row_offset;
 1529     int nx = 0;
 1530     char *test;
 1531 
 1532     fprintf(stderr, "testing for all numerical values in col %d\n",
 1533         col_offset);
 1534 
 1535     for (t=tstart; t<xi->nrows; t++) {
 1536     test = xi->rows[t].cells[col_offset];
 1537     if (!numeric_string(test)) {
 1538         fprintf(stderr, " no: non-numeric cell at row %d\n", t + 1);
 1539         return 0;
 1540     } else if (test != NULL && *test != '\0') {
 1541         nx++;
 1542     }
 1543     }
 1544 
 1545     return nx > 0;
 1546 }
 1547 
 1548 static int alpha_cell (const char *s)
 1549 {
 1550     if (s != NULL) {
 1551     if (*s == '"' || *s == '\'') s++;
 1552     return isalpha(*s);
 1553     }
 1554 
 1555     return 0;
 1556 }
 1557 
 1558 static void book_time_series_setup (wbook *book, DATASET *newinfo, int pd)
 1559 {
 1560     newinfo->pd = pd;
 1561     newinfo->structure = TIME_SERIES;
 1562 
 1563     fprintf(stderr, "stobs='%s'\n", newinfo->stobs);
 1564     newinfo->sd0 = get_date_x(newinfo->pd, newinfo->stobs);
 1565     fprintf(stderr, "sd0=%g\n", newinfo->sd0);
 1566 
 1567     book_set_time_series(book);
 1568     book_unset_obs_labels(book);
 1569 }
 1570 
 1571 /* Make a contiguous array of observation labels for the
 1572    purpose of checking for dated observations. All we need
 1573    here is a "shell"; the actual strings are already
 1574    allocated.
 1575 */
 1576 
 1577 static char **labels_array (xls_info *xi, int row_offset, int j,
 1578                 DATASET *newset)
 1579 {
 1580     char *s, **labels = NULL;
 1581     int i, t, ok = 1;
 1582 
 1583     for (t=0; t<newset->n; t++) {
 1584     i = t + row_offset;
 1585     s = xi->rows[i].cells != NULL ? xi->rows[i].cells[j] : NULL;
 1586     if (s == NULL || *s == '\0') {
 1587         ok = 0;
 1588         break;
 1589     }
 1590     }
 1591 
 1592     if (ok) {
 1593     labels = malloc(newset->n * sizeof *labels);
 1594     if (labels != NULL) {
 1595         for (t=0; t<newset->n; t++) {
 1596         i = t + row_offset;
 1597         labels[t] = xi->rows[i].cells[j];
 1598         }
 1599     }
 1600     }
 1601 
 1602     return labels;
 1603 }
 1604 
 1605 static void maybe_revise_xls_codelist (xls_info *xi)
 1606 {
 1607     if (xi->codelist != NULL) {
 1608     int i;
 1609 
 1610     for (i=1; i<=xi->codelist[0]; i++) {
 1611         xi->codelist[i] += 1;
 1612     }
 1613     }
 1614 }
 1615 
 1616 int xls_get_data (const char *fname, int *list, char *sheetname,
 1617           DATASET *dset, gretlopt opt, PRN *prn)
 1618 {
 1619     int gui = (opt & OPT_G);
 1620     wbook xbook;
 1621     wbook *book = &xbook;
 1622     xls_info xlsi;
 1623     xls_info *xi = &xlsi;
 1624     DATASET *newset;
 1625     struct string_err strerr;
 1626     int ts_markers = 0;
 1627     int missvals = 0;
 1628     char **ts_S = NULL;
 1629     int r0, c0;
 1630     int merge = (dset->Z != NULL);
 1631     int i, t, pd = 0;
 1632     int err = 0;
 1633 
 1634     newset = datainfo_new();
 1635     if (newset == NULL) {
 1636     pputs(prn, _("Out of memory\n"));
 1637     return 1;
 1638     }
 1639 
 1640     if (sheetname != NULL) {
 1641     fprintf(stderr, "xls_get_data: sheetname='%s'\n", sheetname);
 1642     }
 1643 
 1644     gretl_push_c_numeric_locale();
 1645 
 1646     wbook_init(book, list, sheetname);
 1647     xls_info_init(xi);
 1648 
 1649     if (excel_book_get_info(fname, book)) {
 1650     pputs(prn, _("Failed to get workbook info"));
 1651     err = 1;
 1652     } else if (book->nsheets == 0) {
 1653     pputs(prn, _("No worksheets found"));
 1654     err = 1;
 1655     } else {
 1656     wbook_print_info(book);
 1657     }
 1658 
 1659     if (!err) {
 1660     if (gui) {
 1661         wsheet_menu(book, book->nsheets > 1);
 1662         if (book_debugging(book)) {
 1663         debug_print = 1;
 1664         print_version();
 1665         }
 1666     } else {
 1667         err = wbook_check_params(book);
 1668         if (err) {
 1669         gretl_errmsg_set(_("Invalid argument for worksheet import"));
 1670         }
 1671     }
 1672     }
 1673 
 1674     dbprintf("sheet selected=%d; import offsets: col=%d, row=%d\n",
 1675          book->selected, book->col_offset, book->row_offset);
 1676 
 1677     if (book->selected == -1) {
 1678     /* canceled */
 1679     err = -1;
 1680     }
 1681 
 1682     if (err) goto getout;
 1683 
 1684     /* processing for specific worksheet */
 1685     err = process_sheet(fname, book, xi, prn);
 1686 
 1687     if (err) {
 1688     const char *buf = gretl_print_get_buffer(prn);
 1689 
 1690     if (*buf == 0) {
 1691         pputs(prn, _("Failed to process Excel file"));
 1692         buf = gretl_print_get_buffer(prn);
 1693     }
 1694     fprintf(stderr, "%s\n", buf);
 1695     goto getout;
 1696     }
 1697 
 1698     /* get sizes and locate any blank columns */
 1699     err = get_sheet_dimensions(book, xi, prn);
 1700 
 1701     if (err) goto getout;
 1702 
 1703     /* check feasibility of offsets */
 1704     if (book->row_offset >= xi->nrows) {
 1705     pputs(prn, _("Starting row is out of bounds.\n"));
 1706     err = 1;
 1707     } else if (book->col_offset >= xi->totcols) {
 1708     pputs(prn, _("Starting column is out of bounds.\n"));
 1709     err = 1;
 1710     }
 1711 
 1712     if (err) goto getout;
 1713 
 1714     if (first_col_strings(book, xi)) {
 1715     puts("found label strings in first imported column");
 1716     } else if (book_numeric_dates(book)) {
 1717     puts("found calendar dates in first imported column");
 1718     } else {
 1719     puts("check for label strings in first imported column: not found");
 1720     }
 1721 
 1722     /* any bad or missing variable names? */
 1723     err = check_all_varnames(book, xi, prn);
 1724 
 1725     if (err == VARNAMES_NULL || err == VARNAMES_NOTSTR) {
 1726     pputs(prn, _("One or more variable names are missing.\n"));
 1727     pputs(prn, _(adjust_rc));
 1728     } else if (err == VARNAMES_NONE) {
 1729     pputs(prn, _("it seems there are no variable names\n"));
 1730     book_set_auto_varnames(book);
 1731     book->row_offset -= 1;
 1732     err = 0;
 1733     }
 1734 
 1735     if (err) goto getout;
 1736 
 1737     /* any bad data? */
 1738     err = check_data_block(book, xi, &missvals, &strerr);
 1739 
 1740     if (err) {
 1741     pprintf(prn, _("Expected numeric data, found string:\n"
 1742                "%s\" at row %d, column %d\n"),
 1743         strerr.str, strerr.row, strerr.column);
 1744     g_free(strerr.str);
 1745     pputs(prn, _(adjust_rc));
 1746     goto getout;
 1747     } else if (missvals) {
 1748     pputs(prn, _("Warning: there were missing values\n"));
 1749     }
 1750 
 1751     r0 = book->row_offset;
 1752     c0 = book->col_offset;
 1753     newset->n = xi->nrows - 1 - r0;
 1754 
 1755     if (book_numeric_dates(book) ||
 1756     (!book_auto_varnames(book) && import_obs_label(xls_cell(xi, r0, c0)))) {
 1757     char **labels = labels_array(xi, r0 + 1, c0, newset);
 1758 
 1759     if (labels != NULL) {
 1760         pd = importer_dates_check(labels, &book->flags, newset, prn, &err);
 1761         free(labels);
 1762     }
 1763 
 1764     if (pd > 0) {
 1765         /* got time-series info from dates/labels */
 1766         book_time_series_setup(book, newset, pd);
 1767         ts_markers = newset->markers;
 1768         ts_S = newset->S;
 1769     } else if (!book_numeric_dates(book) &&
 1770            alpha_cell(xls_cell(xi, r0, c0)) &&
 1771            col0_is_numeric(xi, r0, c0)) {
 1772         book_unset_obs_labels(book);
 1773         maybe_revise_xls_codelist(xi);
 1774     }
 1775     }
 1776 
 1777     /* dimensions of the dataset */
 1778     newset->v = n_vars_from_col(book, xi->totcols, xi->blank_col);
 1779     fprintf(stderr, "newset->v = %d, newset->n = %d\n",
 1780         newset->v, newset->n);
 1781 
 1782     /* create import dataset */
 1783     err = worksheet_start_dataset(newset);
 1784     if (err) {
 1785     goto getout;
 1786     }
 1787 
 1788     if (book_time_series(book)) {
 1789     newset->markers = ts_markers;
 1790     newset->S = ts_S;
 1791     } else {
 1792     dataset_obs_info_default(newset);
 1793     }
 1794 
 1795     /* OK: actually populate the dataset */
 1796     err = transcribe_data(book, xi, newset, prn);
 1797     if (err) {
 1798     goto getout;
 1799     }
 1800 
 1801     if (fix_varname_duplicates(newset)) {
 1802     pputs(prn, _("warning: some variable names were duplicated\n"));
 1803     }
 1804 
 1805     if (book_obs_labels(book)) {
 1806     dataset_allocate_obs_markers(newset);
 1807     if (newset->S != NULL) {
 1808         i = book->col_offset;
 1809         for (t=0; t<newset->n; t++) {
 1810         int ts = t + 1 + book->row_offset;
 1811         char *src = xls_cell(xi, ts, i);
 1812 
 1813         if (src != NULL) {
 1814             gretl_utf8_strncat_trim(newset->S[t], src + 1, OBSLEN - 1);
 1815         }
 1816         }
 1817     }
 1818     }
 1819 
 1820     if (book->flags & BOOK_DATA_REVERSED) {
 1821     reverse_data(newset, prn);
 1822     }
 1823 
 1824     if (!err && xi->st != NULL) {
 1825     err = gretl_string_table_validate(xi->st);
 1826     if (err) {
 1827         pputs(prn, A_("Failed to interpret the data as numeric\n"));
 1828     } else {
 1829         gretl_string_table_print(xi->st, newset, fname, prn);
 1830     }
 1831     }
 1832 
 1833     err = merge_or_replace_data(dset, &newset, get_merge_opts(opt), prn);
 1834 
 1835     if (!err && !merge) {
 1836     dataset_add_import_info(dset, fname, GRETL_XLS);
 1837     }
 1838 
 1839     if (!err && gui) {
 1840     wbook_record_params(book, list);
 1841     }
 1842 
 1843  getout:
 1844 
 1845     free_xls_info(xi);
 1846     wbook_free(book);
 1847     gretl_pop_c_numeric_locale();
 1848 
 1849     if (newset != NULL) {
 1850     destroy_dataset(newset);
 1851     }
 1852 
 1853     return err;
 1854 }