"Fossies" - the Fresh Open Source Software Archive

Member "statist-1.4.2/src/data.c" (21 Oct 2006, 48016 Bytes) of package /linux/privat/old/statist-1.4.2.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "data.c" see the Fossies "Dox" file reference documentation.

    1 /* This file is part of statist
    2 **
    3 ** It is distributed under the GNU General Public License.
    4 ** See the file COPYING for details.
    5 **
    6 ** (c) 1997 Dirk Melcher
    7 ** old email address: Dirk.Melcher@usf.Uni-Osnabrueck.DE
    8 **
    9 ** adapted for statistX: Andreas Beyer, 1999, abeyer@usf.uni-osnabrueck.de
   10 **
   11 ** The function get_line() was adapted from GNU coretutils 5.2.1 getndelim2.c
   12 ** Copyright (C) 1993, 1996, 1997, 1998, 2000, 2003 Free Software
   13 ** Foundation, Inc.
   14 **
   15 **  published by Bernhard Reiter  http://www.usf.Uni-Osnabrueck.DE/~breiter
   16 **  $Id: data.c,v 1.44 2006/10/21 23:15:22 jakson Exp $
   17 ***************************************************************/
   18 
   19 /* data.c for STATIST */
   20 #include <stdio.h>
   21 #include <string.h>
   22 #include <ctype.h>
   23 #include <math.h>
   24 #include <stdlib.h>
   25 #include <time.h>
   26 
   27 #include "statist.h"
   28 #include "data.h"
   29 #include "funcs.h"
   30 #include "menue.h"
   31 
   32 #include "gettext.h"
   33 
   34 /* ==================================================================== */
   35 
   36 static short int *labelcol;
   37 static short int n_lab;
   38 
   39 
   40 /* ==================================================================== */
   41 
   42 void inflate_MCOL(){
   43   int i, oldmax = MCOL;
   44   MCOL += 64;
   45   xx = (PREAL*)myrealloc(xx, (MCOL * sizeof(PREAL)));
   46   nn = (int*)myrealloc(nn, (MCOL * sizeof(int)));
   47   vn = (int*)myrealloc(vn, (MCOL * sizeof(int)));
   48   acol = (int*)myrealloc(acol, (MCOL * sizeof(int)));
   49   x_read = (BOOLEAN*)myrealloc(x_read, (MCOL * sizeof(BOOLEAN)));
   50   alias = (char**)myrealloc(alias, (MCOL * sizeof(char*)));
   51   tmpptr = (FILE**)myrealloc(tmpptr, (MCOL * sizeof(FILE*)));
   52   labelcol = (short int*)myrealloc(labelcol, (MCOL * sizeof(short int)));
   53   names = (Labels**)myrealloc(names, (MCOL * sizeof(Labels*)));
   54 
   55    for (i = oldmax; i < MCOL; i++){
   56      xx[i] = NULL;
   57      nn[i] = 0;
   58      vn[i] = 0;
   59      acol[i] = 0;
   60      x_read[i] = FALSE;
   61      alias[i] = NULL;
   62      tmpptr[i] = NULL;
   63      alias[i] = NULL;
   64      names[i] = NULL;
   65    }
   66 }
   67 
   68 char * get_default_label(int i){
   69   char * newlabel;
   70   char * tempLabel = (char*)mycalloc(4, sizeof(char));
   71   if((i+CHAR_OFFSET) <= 'z'){
   72     tempLabel[0] = (char)(i+CHAR_OFFSET);
   73   } else{
   74     tempLabel[0] = (char)(((i + 26) / 26) + CHAR_OFFSET - 2);
   75     tempLabel[1] = (char)((i  % 26) + CHAR_OFFSET);
   76   }
   77   newlabel = (char*)mymalloc(sizeof(char) * (strlen(tempLabel) + 1));
   78   strcpy(newlabel, tempLabel);
   79   myfree(tempLabel);
   80   return(newlabel);
   81 }
   82 
   83 void create_columns(int amount){
   84   int i;
   85   for(i = 0; i < amount; i++){
   86     if(ncol == MCOL)
   87       inflate_MCOL();
   88     if(alias[ncol] == NULL)
   89       alias[ncol] = get_default_label(ncol);
   90     tmpptr[ncol] = tmpfile();
   91     if(tmpptr[ncol] == NULL){
   92       out_err(FAT, ERR_FILE, ERR_LINE, 
   93       _("System reports error while opening temporary file:\n  \"%s\""),
   94       STRERROR(errno));
   95     }
   96     ncol++;
   97   }
   98 }
   99 
  100 /* Free allocated memory, but not the temporary file */
  101 void free_column(int i){
  102   if((x_read[i])){
  103     myfree(xx[i]);
  104     xx[i] = NULL;
  105     x_read[i] = FALSE;
  106     vn[i] = 0;
  107   }
  108 }
  109 
  110 /* Free allocated memory and erase temporary file */
  111 void delete_column(int i){
  112   free_column(i);
  113   if(alias[i]) {
  114     myfree(alias[i]);
  115     alias[i] = NULL;
  116   }
  117   names[i] = NULL;
  118   if(tmpptr[i]){
  119     FCLOSE(tmpptr[i]);
  120     tmpptr[i] = NULL;
  121   }
  122   nn[i] = 0;
  123   x_read[i] = FALSE;
  124   labelcol[i] = 0;
  125   ncol--;
  126 }
  127 
  128 void erasetempfiles() {
  129    int i;
  130    if(MCOL == 0)
  131      return;
  132    out_d(_("Removing temporary files ...") );
  133    for(i = 0; i < MCOL; i++)
  134      delete_column(i);
  135    myfree(xx);
  136    myfree(alias);
  137    myfree(nn);
  138    myfree(vn);
  139    myfree(acol);
  140    myfree(x_read);
  141    myfree(tmpptr);
  142    myfree(labelcol);
  143    myfree(names);
  144    xx = NULL;
  145    alias = NULL;
  146    nn = NULL;
  147    vn = NULL;
  148    acol = NULL;
  149    x_read = NULL;
  150    tmpptr = NULL;
  151    labelcol = NULL;
  152    names = NULL;
  153    out_d(_(" done\n") );
  154    n_lab = 0;
  155    ncol = 0;
  156    MCOL = 0;
  157  }
  158 
  159 
  160 /* ==================================================================== */
  161 
  162 
  163 /* Adapted from GNU coretutils 5.2.1 getndelim2.c */
  164 int get_line(char **lineptr, size_t *linesize, FILE *stream){
  165   register int c;
  166   int pos = -1; /* index of last byte read */
  167   char * line = *lineptr;
  168   size_t max = *linesize - 1;
  169   for(;;){
  170     c = getc (stream);
  171     if(c == EOF){
  172       /* Return partial line, if any.  */
  173       if (pos == -1)
  174     return -1;
  175       else
  176     break;
  177     }
  178     if(pos == max){
  179       max += 64;
  180       *linesize += 64;
  181       *lineptr = myrealloc(*lineptr, *linesize);
  182       line = *lineptr;
  183     }
  184     pos++;
  185     line[pos] = c;
  186     if (c == '\n')
  187       /* Return the line.  */
  188       break;
  189   }
  190   pos++;
  191   line[pos] = '\0';
  192   pos++;
  193   return pos;
  194 }
  195 
  196 /* =================================================================== */
  197 
  198 void attach_labels_to_columns(){
  199   int i;
  200   Labels *ptr = first_labels;
  201   for(i = 0; i < ncol; i++)
  202     names[i] = NULL;
  203   while(ptr){
  204     for(i = 0; i < ncol; i++){
  205       if(strcmp(alias[i], ptr->clabel) == 0){
  206     names[i] = ptr;
  207     break;
  208       }
  209     }
  210     ptr = ptr->next;
  211   }
  212 }
  213 
  214 void delete_labels(Labels *ptr){
  215   int i;
  216   Labels *p;
  217   if(ptr == first_labels){
  218     first_labels = first_labels->next;
  219   } else{
  220     p = first_labels;
  221     while(p->next != ptr)
  222       p = p->next;
  223     p->next = ptr->next;
  224   }
  225   if(ptr->clabel)
  226     myfree(ptr->clabel);
  227   if(ptr->ctitle)
  228     myfree(ptr->ctitle);
  229   if(ptr->n > 0){
  230     for(i = 0; i < ptr->n; i++)
  231       myfree(ptr->l[i]);
  232   }
  233   if(ptr->v)
  234     myfree(ptr->v);
  235   if(ptr->l)
  236     myfree(ptr->l);
  237   myfree(ptr);
  238 }
  239 
  240 /* Delete problematic Labels */
  241 void check_labels(){
  242   Labels *next, *ptr = first_labels;
  243   while(ptr){
  244     next = ptr->next;
  245     if(ptr->clabel == NULL || (ptr->ctitle == NULL && ptr->n == 0))
  246       delete_labels(ptr);
  247     ptr = next;
  248   }
  249 }
  250 
  251 /* Creates a linked list of "Labels". The list of Labels might have labels for
  252  * columns that don't exist in the current datafile. */
  253 void read_labels(char *labelsfile){
  254   BOOLEAN getting_labels = FALSE;
  255   Labels *ptr = NULL;
  256   FILE *F;
  257   int i, max = 0;
  258   char b[255], *s, t[255];
  259   FOPEN(labelsfile, "r", F);
  260   while(fgets(b, 254, F)){
  261     s = b;
  262     while(s[0] == ' ' || s[0] == '\t')
  263       s++;
  264     if(strlen(s) < 2){
  265       if(getting_labels){
  266     getting_labels = FALSE;
  267       }
  268       continue;
  269     }
  270     if(s[0] == '#')
  271       continue;
  272     i = 0;
  273     if(!getting_labels){
  274       if(ptr == NULL){
  275     first_labels = (Labels*)mycalloc(1, sizeof(Labels));
  276     ptr = first_labels;
  277       } else{
  278     ptr->next = (Labels*)mycalloc(1, sizeof(Labels));
  279     ptr = ptr->next;
  280       }
  281       while(!(s[i] == ' ' || s[i] == '\t' || s[i] == '\n' || s[i] == '\r')){
  282     t[i] = s[i];
  283     i++;
  284       }
  285       t[i] = 0;
  286       ptr->clabel = (char*)mymalloc((strlen(t) + 1) * sizeof(char));
  287       strcpy(ptr->clabel, t);
  288       s += i;
  289       while(s[0] == ' ' || s[0] == '\t')
  290     s++;
  291       i = 0;
  292       while(!(s[i] == '\n' || s[i] == '\r')){
  293     t[i] = s[i];
  294     i++;
  295       }
  296       t[i] = 0;
  297       if(strlen(t) > 1){
  298     ptr->ctitle = (char*)mymalloc((strlen(t) + 1) * sizeof(char));
  299     strcpy(ptr->ctitle, t);
  300       }
  301       max = 0;
  302       ptr->n = 0;
  303       getting_labels = TRUE;
  304     } else{
  305       if(ptr->n == max){
  306     max += 10;
  307     ptr->v = (REAL*)myrealloc(ptr->v, (max * sizeof(REAL)));
  308     ptr->l = (char**)myrealloc(ptr->l, (max * sizeof(char*)));
  309       }
  310       while(!(s[i] == ' ' || s[i] == '\t' || s[i] == '\n' || s[i] == '\r')){
  311     t[i] = s[i];
  312     i++;
  313       }
  314       t[i] = 0;
  315       if(sscanf(t, "%lf", &(ptr->v[ptr->n])) == 1){
  316     s += i;
  317     while(s[0] == ' ' || s[0] == '\t')
  318       s++;
  319     i = 0;
  320     while(!(s[i] == '\n' || s[i] == '\r')){
  321       t[i] = s[i];
  322       i++;
  323     }
  324     t[i] = 0;
  325     i = strlen(t);
  326     if(i > 1){
  327       ptr->l[ptr->n] = (char*)mymalloc((i+1) * sizeof(char));
  328       strcpy(ptr->l[ptr->n], t);
  329       ptr->n++;
  330     } else
  331       continue;
  332       } else
  333     continue;
  334     }
  335   }
  336   FCLOSE(F);
  337   check_labels();
  338   if(first_labels == NULL)
  339     out_err(ERR, ERR_FILE, ERR_LINE,
  340     _("No labels found in \"%s\"!"), labelsfile);
  341   else
  342     attach_labels_to_columns();
  343 }
  344 
  345 void set_fileformat(){
  346   char answer[80];
  347   int status = 1;
  348 
  349   out_i(_("Does the file contain the column names? (%s) "), _("y/n"));
  350   GETRLINE;
  351   status = sscanf(line, "%s", answer);
  352   if (status == 0)
  353     return;
  354   if(answer[0] == _("y")[0] || answer[0] == _("Y")[0]){
  355     has_header = TRUE;
  356     noheader = FALSE;
  357   } else
  358     /* FIXME: can't translate "N" because this string is already used in
  359      * Frequency table and Compare means */
  360     if(answer[0] == _("n")[0] || answer[0] == 'N'){
  361       has_header = FALSE;
  362       noheader = TRUE;
  363     } else
  364       return;
  365 
  366   do{
  367   out_i(_("Decimal delimiter [%c]: "), dec);
  368   GETBLINE;
  369   status = sscanf(line, "%s\n", answer);
  370   if (status == 0)
  371     return;
  372   if(answer[0] == ',' || answer[0] == '.')
  373     dec = answer[0];
  374   else
  375     out_err(WAR, ERR_FILE, ERR_LINE,
  376     _("Invalid decimal delimiter: '%c'. Please, choose either ',' or '.'"), answer[0]);
  377   } while(!(answer[0] == ',' || answer[0] == '.'));
  378 
  379   do{
  380     if(sep){
  381       if(sep == '\t')
  382     strcpy(answer, "\\t");
  383       else
  384     sprintf(answer, "%c", sep);
  385     } else
  386       sprintf(answer, " ,;\\t");
  387     out_i(_("Field separator ( \\t,;)[%s]: "), answer);
  388     GETBLINE;
  389     status = sscanf(line, "%s\n", answer);
  390     if(line[0] == ' ')
  391       strcpy(answer, " ");
  392     if (status == 0)
  393       return;
  394     if(answer[0] == ','  || answer[0] == ';' || answer[0] == ' ')
  395       sep = answer[0];
  396     else
  397       if(strcmp(answer, "\\t") == 0)
  398     sep = '\t';
  399       else{
  400     out_err(WAR, ERR_FILE, ERR_LINE, _("Invalid field separator: '%c'"), answer[0]);
  401     status = 0;
  402       }
  403   } while(status == 0);
  404 
  405   out_i(_("What string indicates missing values? [%s]: "), NODATA);
  406   GETNLINE;
  407   if(!empty){
  408     status = sscanf(line, "%s\n", answer);
  409     if (status == 0)
  410       return;
  411     myfree(NODATA);
  412     NODATA = (char*) mymalloc((strlen(answer) + 1) * sizeof(char));
  413     strcpy(NODATA, answer);
  414   }
  415 }
  416 
  417 void show_file_head(char *fn){
  418   FILE *source;
  419   char *aline;
  420   char *b;
  421   int i, rlen;
  422   size_t blen=64;
  423 
  424   FOPEN(fn, "rt", source);
  425   aline = (char*)mymalloc(blen);
  426   rlen = get_line(&aline, &blen, source);
  427   i = 0;
  428 
  429   out_i(_("First lines of \"%s\":"), fn);
  430   out_d("\n\n");
  431   set_winsize();
  432   b = (char*)m_calloc(SCRCOLS + 4, sizeof(char));
  433   while (rlen > -1 && i < 10){
  434     snprintf(b, SCRCOLS, "%s", aline);
  435     b[SCRCOLS - 3] = '\n';
  436     b[SCRCOLS - 2] = 0;
  437     out_d(" %s", b);
  438     rlen = get_line(&aline, &blen, source);
  439     i++;
  440   }
  441   out_d("\n\n");
  442   FCLOSE(source);
  443   myfree(aline);
  444 }
  445 
  446 void remove_quotes(char *s){
  447   int i = 0, j = 0, l;
  448   l = strlen(s);
  449   while(i < l){
  450     if(s[i] != '"'){
  451       s[j] = s[i];
  452       j++;
  453     }
  454     i++;
  455   }
  456   s[j] = 0;
  457 }
  458 
  459 int parsecomment(const char *theline, BOOLEAN is_comment) {
  460   int new = 0, j, n, is_rpt;
  461   char *s, *token, *comment;
  462   char ignore[] = " ,;\"\n\t";
  463   char *var_id;
  464   
  465   if(is_comment){
  466     var_id = (char*)mymalloc(sizeof(char) * 3);
  467     strcpy(var_id, "#%");/* this char indicates the line contains labels */
  468   } else{
  469     var_id = (char*)m_calloc(1, 1);
  470     var_id[0] = 0;
  471   }
  472 
  473   comment = (char*)m_calloc(sizeof(char), (strlen(theline) + 1));
  474   strcpy(comment, theline);
  475 
  476   if (strstr(comment, var_id)!=comment) { /* no valid Var - comment */
  477     if(strstr(comment, "#!")==comment && strcmp(var_id,"#!")!=0 )
  478       out_err(WAR, ERR_FILE, ERR_LINE,
  479       _("'#!' is an illegal indicator of a column of labels.") );
  480     return -1;
  481   }
  482   s = comment+strlen(var_id);  /* jump over var_id */
  483   n = ncol;
  484   while ((token = strtok(s, ignore))!= NULL) {
  485     s = NULL;
  486     if (token[0]=='$') {
  487       if(labelcol == NULL)
  488     inflate_MCOL();
  489       labelcol[n_lab] = n;
  490       n_lab++;
  491       out_d(_("Label in column %i='%s'\n"), (n+1), token);
  492     }
  493     else {
  494       if(n >= MCOL)
  495     inflate_MCOL();
  496       if(alias[n] != NULL)
  497     myfree(alias[n]);
  498       is_rpt = FALSE; /* avoiding repeated labels */
  499       for (j=0; j <n; j++)
  500     if (strcmp(alias[j], token) == 0)
  501       is_rpt = TRUE;
  502       if (is_rpt){
  503     alias[n] = (char*) mymalloc((strlen(token)+2) * sizeof(char));
  504     strcpy(alias[n], token);
  505     alias[n][strlen(token)] = '_';
  506     alias[n][strlen(token)+1] = 0;
  507       }
  508       else{
  509     alias[n] = (char*) mymalloc((strlen(token)+1) * sizeof(char));
  510     strcpy(alias[n], token);
  511       }
  512       n++;
  513       new++;
  514     }
  515   }
  516 
  517   if (new == 0 && is_comment) {
  518     if(silent)
  519       out_err(FAT, ERR_FILE, ERR_LINE, _("No variables found in comment!"));
  520     out_err(ERR, ERR_FILE, ERR_LINE, _("No variables found in comment!"));
  521   }
  522 
  523   return new; /* number of new labels (not counting number of $labels) */
  524 }
  525 
  526 /* ==================================================================== */
  527 
  528 void delete_last_columns(int i){
  529   int j;
  530   for(j = 0; j < i; j++){
  531     delete_column(ncol - 1);
  532   }
  533 }
  534 
  535 void put_dots(char * s, char c){
  536   while(*s){
  537     if(*s == c)
  538       *s = '.';
  539     s++;
  540   }
  541 }
  542 
  543 void clean_the_line(char *s){
  544   char *b = s;
  545   int i = 0;
  546   while(*s){
  547     if(*s == '"')
  548       i = !i;
  549     if(i && *s == ',')
  550       *s = '.';
  551     s++;
  552   }
  553 
  554   /* If (dec == ',' && sep == ','), the data is quoted and, thus, there is no
  555    * need of replacing commas with dots because this was already done. */
  556   if(dec == ',' && sep != ',')
  557     put_dots(b, dec);
  558   /* If sep was defined, and the missing values are quoted, the token will be
  559    * "\"M\"" */
  560   if(sep)
  561     remove_quotes(b);
  562 }
  563 
  564 void finish_readingsource(FILE *F, char *s, BOOLEAN try_again){
  565   FCLOSE(F);
  566   myfree(s);
  567 #ifndef NO_GETTEXT
  568   RESET_LOCALE;
  569 #endif
  570 
  571   /* If ask_fileformat == TRUE, the user was already asked about the file format */
  572   char answer[80];
  573   static int attempts = 0;
  574   if(try_again && !silent && !ask_fileformat && attempts < 3){
  575     out_d(_("Statist failed to open the file. Perhaps it\n"
  576       "didn't detect the file format correctly.\n"));
  577     out_i(_("Would you like to set the file format? (%s) "), _("Y/n"));
  578     GETNLINE;
  579     out_d("\n");
  580     if(!empty){
  581       sscanf(line, "%s", answer);
  582       if(answer[0] == _("n")[0] || answer[0] == 'N'){
  583     attempts = 0;
  584     return;
  585       }
  586     }
  587     attempts++;
  588     show_file_head(sourcename);
  589     set_fileformat();
  590     readsourcefile();
  591   }
  592   attempts = 0;
  593 }
  594 
  595 void readsourcefile(){
  596   FILE *source;
  597   char *aline; /* current line, old line */
  598   int i, j, newlabs=0, newcol=0, actcol, colread = 0, lread=0, i_lab=0, rlen;
  599   size_t blen=64;
  600   REAL  test;
  601   char ignore[]= " ,;\"\n\t\0", *ptr, *token = NULL;
  602   BOOLEAN statist_labels = FALSE, header_OK = FALSE;
  603 #ifndef NO_GETTEXT
  604   SET_C_LOCALE;
  605 #endif
  606 
  607   if(ask_fileformat){
  608     show_file_head(sourcename);
  609     set_fileformat();
  610   }
  611 
  612   FOPEN(sourcename, "rt", source);
  613 
  614   aline = (char*)mymalloc(blen);
  615   rlen = get_line(&aline, &blen, source);
  616   lread ++;
  617   if(noheader){ /* Don't scan the first lines looking for a header */
  618     while (rlen > -1 && (emptyline(aline) || aline[0] == COMMENT)){
  619       rlen = get_line(&aline, &blen, source);
  620       lread ++;
  621     }
  622   } else{
  623 
  624     /* Seek column labels, skipping true commentaries and empty lines */
  625     while (rlen > -1 && (emptyline(aline) || aline[0] == COMMENT)){
  626       if(aline[0] == COMMENT && (aline[1] == '%' || aline[1] == '!')){
  627     newlabs = parsecomment(aline, TRUE);
  628     if(newlabs)
  629       statist_labels = TRUE;
  630       }
  631       rlen = get_line(&aline, &blen, source);
  632       lread ++;
  633     }
  634     if(rlen == -1){
  635       if(silent){
  636     out_err(FAT, ERR_FILE, ERR_LINE,
  637         _("Couldn't find data in file \"%s\"!"), sourcename);
  638       } else{
  639     out_err(ERR, ERR_FILE, ERR_LINE,
  640         _("Couldn't find data in file \"%s\"!"), sourcename);
  641     finish_readingsource(source, aline, FALSE);
  642     return;
  643       }
  644     }
  645 
  646     /* Read the first line as if it contains the column names because the user
  647      * passed the command line option --header */
  648     if(!statist_labels && has_header){
  649       newlabs = (parsecomment(aline, FALSE));
  650       if(newlabs > 0)
  651     header_OK = TRUE;
  652       for(i = 0; i < newlabs; i++){
  653     j = i + ncol;
  654     if(!((alias[j][0] >= 'A' && alias[j][0] <= 'Z') 
  655           || (alias[j][0] >= 'a' && alias[j][0] <= 'z'))){
  656       out_err(WAR, ERR_FILE, ERR_LINE,
  657           _("Name of column %d doesn't begin with an ascii letter: \"%s\"."),
  658           j+1, alias[j]);
  659       break;
  660     }
  661       }
  662     } else{
  663       /* Read the first line as if it contains the column names, but drop the
  664        * names if they don't appear to be valid ones. */
  665       if(!statist_labels && detect_header){
  666     newlabs = (parsecomment(aline, FALSE));
  667     if(newlabs > 0){
  668       header_OK = TRUE;
  669       for(i = 0; i < newlabs; i++){
  670         j = i + ncol;
  671         if(!((alias[j][0] >= 'A' && alias[j][0] <= 'Z') 
  672           || (alias[j][0] >= 'a' && alias[j][0] <= 'z'))){
  673           for(j = ncol; j < (ncol+newlabs); j++){
  674         myfree(alias[j]);
  675         alias[j] = NULL;
  676           }
  677           header_OK = FALSE;
  678           newlabs = 0;
  679           break;
  680         }
  681       }
  682     }
  683       }
  684     }
  685 
  686     if(newlabs > 0){
  687       for(i = ncol; i < (ncol+newlabs); i++)
  688     out_d(_("Column %i = %s\n"), i+1, alias[i]);
  689     }
  690 
  691     /* Read another line if we successfully read the column names, although
  692      * the data file doesn't have the "#%" string. */
  693     if(header_OK){
  694       rlen = get_line(&aline, &blen, source);
  695       lread ++;
  696       if(verbose && detect_header && !has_header){
  697     out_d("\n");
  698     if(newlabs == 1)
  699       out_d(_("One valid column name found!"));
  700     else
  701       out_d(_("Valid column names found!"));
  702     out_d("\n\n");
  703       }
  704     }
  705   } /* End of "if(noheader)" */
  706 
  707   /* Parse the first line of data, but don't put the data in the temp files
  708      yet. Just count the number of columns. */
  709   char *linecopy = (char*)m_calloc(2, blen);
  710   strcpy(linecopy, aline);
  711   clean_the_line(linecopy);
  712   
  713   ptr = linecopy;
  714   if(sep){  /* A field separator was defined. */
  715     newcol = 1;
  716     j = strlen(ptr);
  717     for(i = 0; i < j; i++)
  718       if(ptr[i] == sep)
  719     newcol++;
  720   } else{
  721     while ((token = strtok(ptr, ignore))!= NULL) {
  722       ptr = NULL;
  723       while ((i_lab<n_lab) && (newcol==labelcol[i_lab])) {
  724     i_lab++;
  725     token = strtok(ptr, ignore);
  726       }
  727       if (token==NULL) {
  728     break;
  729       }
  730       if ((strcmp(token, NODATA) == 0) || (sscanf(token, "%lf", &test) == 1)) {
  731     newcol++;
  732       }
  733       else {
  734     if(silent){
  735       out_err(FAT, ERR_FILE, ERR_LINE,
  736           _("Illegal format of value '%s' in line %i!\n"
  737         "Couldn't read file %s!"), token, lread, sourcename);
  738     } else{
  739       out_err(ERR, ERR_FILE, ERR_LINE,
  740           _("Illegal format of value '%s' in line %i!\n"
  741         "Couldn't read file %s!"), token, lread, sourcename);
  742       finish_readingsource(source, aline, TRUE);
  743       return;
  744     }
  745       }
  746     }
  747   }
  748 
  749   /* Check whether n_columns == n_labels     */
  750   if ((newlabs != 0) && (newlabs != newcol)) {
  751     if(silent){
  752       out_err(FAT, ERR_FILE, ERR_LINE,
  753       _("Number of columns (%d) does not equal number of labels (%d)!"),
  754       newcol, newlabs);
  755     } else{
  756       out_err(ERR, ERR_FILE, ERR_LINE,
  757       _("Number of columns (%d) does not equal number of labels (%d)!"),
  758       newcol, newlabs);
  759       finish_readingsource(source, aline, TRUE);
  760       return;
  761     }
  762   }
  763 
  764   create_columns(newcol);
  765 
  766   /* Finally, read data */
  767   out_d(_("Reading %i columns ...\n"), newcol);
  768   BOOLEAN endofline;
  769   if(sep)
  770     token = (char*)mymalloc(256);
  771   do {
  772     if ((!emptyline(aline)) && (aline[0]!=COMMENT)) {
  773       colread = 0;
  774       i_lab = 0;
  775       clean_the_line(aline);
  776       ptr = aline;
  777       j = 0;
  778       endofline = FALSE;
  779 
  780       while (!endofline) {
  781     if(sep){
  782       i = 0;
  783       while(!(*ptr == sep || *ptr == '\n')){
  784         token[i] = *ptr;
  785         i++;
  786         ptr++;
  787       }
  788       token[i] = 0;
  789       if(i == 0)
  790         strcpy(token, NODATA);
  791       if(*ptr == '\n'){
  792         endofline = TRUE;
  793       }
  794       ptr++;
  795       actcol = j + (ncol-newcol);
  796     } else{
  797       if((token = strtok(ptr, ignore)) == NULL)
  798         break;
  799       ptr = NULL;
  800       actcol = j + (ncol-newcol);
  801       while ((i_lab<n_lab) && (j==labelcol[i_lab])) {
  802         i_lab++;
  803         token = strtok(ptr, ignore);
  804       }
  805       if (token==NULL) {
  806         break;
  807       }
  808     }
  809     if (j>=newcol) {
  810       if(silent){
  811         out_err(FAT, ERR_FILE, ERR_LINE,
  812         _("Too many columns in row %i. (%d columns)"), lread, j+1);
  813       } else{
  814         out_err(ERR, ERR_FILE, ERR_LINE,
  815         _("Too many columns in row %i. (%d columns)"), lread, j+1);
  816         delete_last_columns(newcol);
  817         finish_readingsource(source, aline, TRUE);
  818         return;
  819       }
  820     }
  821 
  822     if (strcmp(token, NODATA) == 0 || strcmp(token, "nan") == 0){
  823       FWRITE(&SYSMIS, sizeof(REAL), 1, tmpptr[actcol]);
  824       nn[actcol] ++;
  825       colread ++;
  826     }
  827     else if (sscanf(token, "%lf", &test)==1) {
  828       FWRITE(&test, sizeof(REAL), 1, tmpptr[actcol]);
  829       nn[actcol] ++;
  830       colread ++;
  831     }
  832     else {
  833       if(silent){
  834         out_err(FAT, ERR_FILE, ERR_LINE,
  835         _("Illegal format of value '%s' in line %i!"), token, lread);
  836       } else{
  837         out_err(ERR, ERR_FILE, ERR_LINE,
  838         _("Illegal format of value '%s' in line %i!"), token, lread);
  839         delete_last_columns(newcol);
  840         finish_readingsource(source, aline, TRUE);
  841         return;
  842       }
  843     }
  844     j++;
  845       }
  846     }
  847 
  848     if (colread != newcol) {
  849       if(silent){
  850     out_err(FAT, ERR_FILE, ERR_LINE,
  851         _("Row %i contains just %i instead of %i columns!"),
  852         (lread), colread, newcol);
  853       } else{
  854     out_err(ERR, ERR_FILE, ERR_LINE,
  855         _("Row %i contains just %i instead of %i columns!"),
  856         (lread), colread, newcol);
  857     delete_last_columns(newcol);
  858     finish_readingsource(source, aline, TRUE);
  859     return;
  860       }
  861     }
  862     rlen = get_line(&aline, &blen, source);
  863     lread ++;
  864   } while (rlen != -1);
  865 
  866   out_d(_("\nRead data sets: \n") );
  867   for (j=0; j<newcol; j++) {
  868     actcol = j + (ncol-newcol);
  869     out_d(_("Column %s: %i\n"), alias[actcol], nn[actcol]);
  870   }
  871 
  872   finish_readingsource(source, aline, FALSE);
  873 }
  874 
  875 /* ==================================================================== */
  876 
  877 
  878 void newsourcefile() {
  879    char answer[3], newsourcename[80];
  880    FILE *source;
  881 
  882    ls();
  883    out_i(_("Name of data file: ") );
  884    GETRLINE;
  885    sscanf(line, "%s", newsourcename);
  886    out_d("\n\n");
  887 
  888    while ((source = fopen(newsourcename,"rt")) == NULL)
  889    {
  890       out_i(_("File \"%s\" not found!\n"), newsourcename);
  891       out_i(_("Please enter new file name: ") );
  892       GETRLINE;
  893       sscanf(line, "%s", newsourcename);
  894       out_d("\n");
  895    }
  896    FCLOSE(source);
  897    show_file_head(newsourcename);
  898    if(ncol > 0){
  899      out_i(_("Shall the old data be removed? (%s) "), _("y/N") );
  900      GETNLINE;
  901      if(!(empty)){
  902        sscanf(line, "%s", answer);
  903        if (answer[0] == _("y")[0]  || answer[0] == _("Y")[0]) {
  904      erasetempfiles();
  905        }
  906      }
  907    }
  908    if(sourcename)
  909      myfree(sourcename);
  910    sourcename = (char*) mycalloc(strlen(newsourcename) + 1, sizeof(char));
  911    strcpy(sourcename, newsourcename);
  912    readsourcefile();
  913    if (log_set) {
  914      fprintf(logfile, "-----------------------------------------------------\n");
  915      fprintf(logfile,
  916         _("\nNew source file: %s\n\n") , sourcename);
  917    }
  918    attach_labels_to_columns();
  919  }
  920 
  921 
  922 /* =================================================================== */
  923 
  924 int getcols(int min, int max, BOOLEAN eraserow){
  925   char salias[80];
  926   int i, j, w, nc, nr;
  927   BOOLEAN inputok, found;
  928   salias[79] = 0;
  929   if(ncol == 0)
  930     return 0;
  931   if(ncol < min){
  932     if(ncol == 1)
  933       strncpy(salias, _("but this data file has just 1 column!"), 79);
  934     else
  935       snprintf(salias, 80, _("but this data file has only %i columns!"), ncol);
  936     out_err(ERR, ERR_FILE, ERR_LINE,
  937     _("This analysis requires at least %i columns,\n  %s"), min, salias);
  938     return 0;
  939   }
  940 
  941   out_d("\n");
  942   out_d(_("Columns: ") );
  943   if(format_columns_out){
  944     set_winsize();
  945     out_d("\n");
  946     w = 0;
  947     for(j = 0; j < ncol; j++)
  948       if(strlen(alias[j]) > w)
  949     w = strlen(alias[j]);
  950     w += 2;
  951     nc = SCRCOLS / w;
  952     nr = 1 + (ncol / nc);
  953     if(ncol > nc && (ncol % nc) != 0)
  954       nr++;
  955     snprintf(salias, 80, "%%-%is", w);
  956     for(i = 0; i < nr; i++){
  957       for(j = 0; j < nc; j++){
  958     w = nc * i + j;
  959     if(w < ncol)
  960       out_d(salias, alias[w]);
  961       }
  962       out_d("\n");
  963     }
  964   } else{
  965     for (j = 0; j < ncol; j++) {
  966       out_d("%s ", alias[j]);
  967     }
  968   }
  969   out_d("\n");
  970 
  971   i = 0;
  972   do{
  973     inputok = FALSE;
  974     found  = FALSE;
  975     while (!inputok) {
  976       if(max > 1)
  977     out_i(_("Column for variable %i: "), (i+1));
  978       else
  979     out_i(_("Column name: "));
  980       GETBLINE;
  981       sscanf(line, "%s", salias);
  982 
  983       if (strcmp(line, _(_ALL_)) == 0) {
  984     if(max < ncol){
  985       out_err(ERR, ERR_FILE, ERR_LINE,
  986           _("Please, choose at most %i columns!"), max);
  987       break;
  988     }
  989     for (j = 0; j < ncol; j++) {
  990       acol[j] = j;
  991     }
  992     alloc_cols(ncol, eraserow);
  993     return ncol;
  994       }
  995 
  996       /* check if column name is matched exactly */
  997       for (j=0; j<ncol; j++) {
  998     if (strcmp(alias[j], salias)==0) {
  999       acol[i] = j;
 1000       inputok = TRUE;
 1001       i++;
 1002       break;
 1003     }
 1004       }
 1005       if (inputok) {        
 1006     break;      /* exact column alias entered -> go on  */
 1007       }
 1008 
 1009       /* try to complete entered column alias   */
 1010       for (j=0; j<ncol; j++) {
 1011     if (str_in_str(alias[j], salias)) {
 1012       if (found) {
 1013         out_err(ERR, ERR_FILE, ERR_LINE,
 1014         _("Column name '%s' is not unique!"), salias);
 1015         inputok = FALSE;
 1016         i--;
 1017         break;
 1018       }
 1019       else {
 1020         found = TRUE;
 1021         inputok = TRUE;
 1022         acol[i] = j;
 1023         i++;
 1024       }
 1025     }
 1026       }
 1027       if ((!inputok) && (!found)) {
 1028     out_err(ERR, ERR_FILE, ERR_LINE,
 1029         _("Column %s does not exist!"), salias);
 1030       }
 1031       else if ((!inputok) && (found)) {
 1032     found = FALSE;
 1033       }
 1034     }
 1035   } while(!empty && i < max);
 1036 
 1037   if(i < min){
 1038     if(i > 0)
 1039       out_err(ERR, ERR_FILE, ERR_LINE,
 1040       _("At least %i columns have to be selected!"), min);
 1041     return 0;
 1042   }
 1043   if(eraserow == 1 && !(equal_rows(i))){
 1044     out_err(ERR, ERR_FILE, ERR_LINE, _("The columns must have "
 1045       "the same number of data points for this analysis!"));
 1046     return 0;
 1047   }
 1048 
 1049   out_d("\n");
 1050   if (log_set) {
 1051     fprintf(logfile, "-----------------------------------------------------------\n\n");
 1052   }
 1053 
 1054   alloc_cols(i, eraserow);
 1055   return(i);
 1056 }
 1057 
 1058 void printcols() {
 1059   int i, j, k, n, r, p, q, w;
 1060   char b[50], b2[50], *header;
 1061   BOOLEAN labelfound = FALSE;
 1062   b[49] = 0;
 1063 
 1064   /* Choosing columns */
 1065   set_winsize();
 1066   k = (SCRCOLS / 11) - 1;
 1067   if(ncol < k){ /* show all columns if the screen is width enough */
 1068     for (j = 0; j < ncol; j++) {
 1069       acol[j] = j;
 1070     }
 1071     alloc_cols(ncol, 3);
 1072     n = ncol;
 1073   } else{
 1074     n = getcols(1, ncol, 3);
 1075   }
 1076   if(n == 0)
 1077     return;
 1078 
 1079   /* Determining number of rows */
 1080   k = nn[acol[0]];
 1081   j = strlen(alias[acol[0]]) + 10;
 1082   for(i = 1; i < n; i++){
 1083     j += strlen(alias[acol[i]]);
 1084     if(nn[acol[i]] > k)
 1085       k = nn[acol[i]];
 1086   }
 1087 
 1088   /* Creating header */
 1089   if(j < ((n + 1) * 16))
 1090     j = (n + 1) * 16;
 1091   header = (char*)m_calloc(j, sizeof(char));
 1092   strcpy(header, "       ");
 1093   for(i = 0; i < n; i++){
 1094     if(names[acol[i]] && names[acol[i]]->n > 0)
 1095       snprintf(b, 50, "%-10s ", alias[acol[i]]);
 1096     else
 1097       snprintf(b, 50, "%10s ", alias[acol[i]]);
 1098     strcat(header, b);
 1099   }
 1100   strcat(header, "\n");
 1101  
 1102   /* Printing data */
 1103   out_r(_("Data from columns:\n"));
 1104   colorize(ClHeader);
 1105   out_r(header);
 1106   colorize(ClDefault);
 1107   j = 0;
 1108   p = 2; /* already printed lines = column_names + wait_message */
 1109   int sz;
 1110   while(j < k){
 1111     colorize(ClLineNum);
 1112     out_r("%5i: ", (j + 1));
 1113     colorize(ClDefault);
 1114     for(i = 0; i < n; i++){
 1115       if(j < nn[acol[i]]){
 1116     if(xx[acol[i]][j] == SYSMIS)
 1117       out_r("%10c ", '.');
 1118     else{
 1119       if(names[acol[i]])
 1120         for(q = 0; q < names[acol[i]]->n; q++)
 1121           if(names[acol[i]]->v[q] == xx[acol[i]][j]){
 1122         labelfound = TRUE;
 1123         strncpy(b, names[acol[i]]->l[q], 10);
 1124         b[10] = 0;
 1125         sz = 10;
 1126         if(is_utf8){
 1127           /* If there are non-ascii chars, we truncated the label prematurely*/
 1128           while(stringLen(b) < 10 && sz < 40){
 1129             sz++;
 1130             strncpy(b, names[acol[i]]->l[q], sz);
 1131           }
 1132 
 1133           /* Avoiding truncating multibyte char, at least in Latin 1. */
 1134           w = strlen(b);
 1135           if(b[w-1] == (char)0xC3){
 1136             sz++;
 1137             strncpy(b, names[acol[i]]->l[q], sz);
 1138           }
 1139           sz = 10 + strlen(b) - stringLen(b);
 1140         }
 1141 
 1142         sprintf(b2, "%%-%ds ", sz);
 1143         out_r(b2, b);
 1144           }
 1145       if(labelfound)
 1146         labelfound = FALSE;
 1147       else
 1148         out_r("%10g ", xx[acol[i]][j]);
 1149     }
 1150       } else{
 1151     out_r("%10s ", " ");
 1152       }
 1153     }
 1154     out_r("\n");
 1155     p++;
 1156     if (p == (SCRLINES - 1) && !(silent)){
 1157       p = 2;
 1158       out_i(_("---> Please, choose: <RETURN> to continue,\n"
 1159         "     <Any letter> to stop, or a row number: ") );
 1160       GETNLINE
 1161     if(!empty){
 1162       if((line[0] >= 'a' && line[0] <= 'z') 
 1163           || (line[0] >= 'A' && line[0] <= 'z'))
 1164         return;
 1165       r = getint();
 1166       if(r > 0)
 1167         j = r - 2;
 1168     }
 1169       colorize(ClHeader);
 1170       out_r(header);
 1171       colorize(ClDefault);
 1172     }
 1173     j++;
 1174   }
 1175 }
 1176 
 1177 void printcol(REAL x[], int n) {
 1178    int i, k;
 1179    out_r(_("Data from column \"%s\":\n"), get_label(x));
 1180    for (i=0; i<n; i++) {
 1181      k=i+1;
 1182      if (x[i] == SYSMIS)
 1183        out_r("%5i.)  %s\n", k, NODATA);
 1184      else
 1185        out_r("%5i.)  %g\n", k, x[i]);
 1186      if ((i+1) % (SCRLINES - 1) == 0) {
 1187     mywait();
 1188     if (!empty) {
 1189       return;
 1190     }
 1191      }
 1192    }
 1193    out_r("-------------------------------------------\n\n");
 1194 }
 1195 
 1196 /* =================================================================== */
 1197 
 1198 
 1199 PREAL readcol(int i) {
 1200   PREAL px;
 1201 
 1202   if (nn[i] == 0) {
 1203     out_err(FAT, ERR_FILE, ERR_LINE,
 1204         _("Column %i does not exist!"), i+1);
 1205   }
 1206   px = (REAL*)mycalloc(nn[i], sizeof(REAL));
 1207   rewind(tmpptr[i]);
 1208   FREAD(px, sizeof(REAL), nn[i], tmpptr[i]);
 1209   x_read[i] = TRUE;
 1210   return px;
 1211 }
 1212 
 1213 /* ==================================================================== */
 1214 
 1215 
 1216 void alloc_cols(int n_alloc, BOOLEAN eraserow) {
 1217   int k;
 1218   int cr = 0; /* current row */
 1219   int tr = 0; /* total number of rows already checked */
 1220   BOOLEAN RowHasMis = FALSE;
 1221 
 1222   /* delete all columns from memory */
 1223   for (k=0; k<MCOL; k++){
 1224     if((x_read[k])){
 1225       free_column(k);
 1226     }
 1227   }
 1228 
 1229   /* put selected columns in memory */
 1230   for (k=0; k<n_alloc; k++)
 1231     if (!x_read[acol[k]]){
 1232       xx[acol[k]] = readcol(acol[k]);
 1233     }
 1234 
 1235   /* Delete rows with missing values or simply delete missing values */
 1236   /* if eraserow == 3, do nothing                                    */
 1237   if (eraserow == TRUE){
 1238     while(tr < nn[acol[0]]){
 1239       for (k=0; k<n_alloc; k++)
 1240         if(xx[acol[k]][tr] == SYSMIS) RowHasMis = TRUE;
 1241       if (RowHasMis){
 1242         tr++;
 1243         RowHasMis = FALSE;
 1244       }
 1245       else{
 1246         for (k=0; k<n_alloc; k++)
 1247           xx[acol[k]][cr] = xx[acol[k]][tr];
 1248         cr++;
 1249         tr++;
 1250       }
 1251     }
 1252     for (k=0; k<n_alloc; k++)
 1253       vn[acol[k]] = cr;
 1254     out_r( _("%d rows with missing values were deleted for this analysis\n\n"),
 1255           (nn[acol[0]] - cr));
 1256   }
 1257   else if (eraserow == FALSE) {
 1258     for (k=0; k<n_alloc; k++){
 1259       tr = 0;
 1260       cr = 0;
 1261       while (tr < nn[acol[k]]){
 1262         if(xx[acol[k]][tr] == SYSMIS)
 1263           tr++;
 1264         else {
 1265           xx[acol[k]][cr] = xx[acol[k]][tr];
 1266           cr++;
 1267           tr++;
 1268         }
 1269       }
 1270       vn[acol[k]] = cr;
 1271       out_r( _("Column %s: %d data points\n"),
 1272           alias[acol[k]], cr);
 1273     }
 1274   }
 1275 
 1276   if (log_set)
 1277     for (k=0; k<n_alloc; k++)
 1278       fprintf(logfile, _("Variable %i = Column %s\n"), (k+1), alias[acol[k]] );
 1279 
 1280   /* rewinding of pointers to tmpfiles */
 1281   for (k=0; k<n_alloc; k++)
 1282     rewind(tmpptr[acol[k]]);
 1283 }
 1284 
 1285 
 1286 BOOLEAN make_new_col(char *analias, int n) {
 1287   int i;
 1288 
 1289   for (i=0; i<ncol; i++) {
 1290     if (strcmp(analias, alias[i])==0) {
 1291       out_err(ERR, ERR_FILE, ERR_LINE,
 1292         _("Column %s exists already!"), analias);
 1293       return FALSE;
 1294     }
 1295   }
 1296   create_columns(1);
 1297   if(alias[ncol - 1])
 1298     myfree(alias[ncol - 1]);
 1299   alias[ncol - 1] = (char*)mymalloc((strlen(analias)+1));
 1300   strcpy(alias[ncol - 1], analias);
 1301   out_r(_("New column %s created!\n"), alias[ncol - 1]);
 1302   nn[ncol - 1] = n;
 1303   return TRUE;
 1304 }
 1305 
 1306  /* =================================================================== */
 1307 
 1308 int col_exist(char *analias, BOOLEAN is_error) {
 1309   int i;
 1310 
 1311   for ( i=0; i<ncol; i++ ) {
 1312     if ( alias[i] && strcmp(analias, alias[i])==0)  {
 1313       if(is_error)
 1314     out_err(ERR, ERR_FILE, ERR_LINE,
 1315         _("Column %s exists already!"), analias);
 1316       return i;
 1317     }
 1318   }
 1319   return -1;
 1320 }
 1321 
 1322 /* =================================================================== */
 1323 
 1324 /* =================================================================== */
 1325 
 1326 #ifndef STATIST_X
 1327 char *get_label(PREAL x) {
 1328   int i;
 1329 
 1330   for (i=0; i<ncol; i++) {
 1331     if (x == xx[i]) {
 1332       if(names[i] && names[i]->ctitle)
 1333     return names[i]->ctitle;
 1334       else
 1335     return alias[i];
 1336     }
 1337   }
 1338   out_err(ERR, ERR_FILE, ERR_LINE,
 1339     _("No label found for column!") );
 1340   return NULL;
 1341 }
 1342 #endif
 1343 
 1344 char *get_name(PREAL x) {
 1345   int i;
 1346 
 1347   for (i=0; i<ncol; i++) {
 1348     if (x == xx[i] && alias[i]){
 1349     return alias[i];
 1350     }
 1351   }
 1352   out_err(ERR, ERR_FILE, ERR_LINE,
 1353     _("No name found for column!") );
 1354   return NULL;
 1355 }
 1356 
 1357 void log_transform() {
 1358   char analias[80];
 1359   PREAL y;
 1360   int i, n = 0;
 1361 
 1362   out_i(_("Please select column for log-transformation\n") );
 1363   i = getcols(1, 1, 3);
 1364   if(i == 0)
 1365     return;
 1366   strncpy(analias, "log_", 79);
 1367   strncat(analias, alias[acol[0]], 79-strlen(analias));
 1368   if(col_exist(analias, TRUE) != -1)
 1369     return;
 1370   y = (REAL*)m_calloc(nn[acol[0]], sizeof(REAL));
 1371   for (i=0; i<nn[acol[0]]; i++) {
 1372     if (xx[acol[0]][i] > 0.0)
 1373       y[i] = log10(xx[acol[0]][i]);
 1374     else{
 1375       y[i] = SYSMIS;
 1376       if(xx[acol[0]][i] != SYSMIS)
 1377     n++;
 1378     }
 1379   }
 1380 
 1381   if(n == 1)
 1382     out_err(MWA, ERR_FILE, ERR_LINE, _("One value was less or equal to zero"
 1383       " and was transformed into missing value!"));
 1384   if(n > 1)
 1385     out_err(MWA, ERR_FILE, ERR_LINE, _("%i values were less or equal to zero"
 1386       " and were transformed into missing values!"), n);
 1387 
 1388   if (!(make_new_col(analias, nn[acol[0]]))) {
 1389     return;
 1390   }
 1391   FWRITE(y, sizeof(REAL), nn[acol[0]], tmpptr[ncol - 1]);
 1392 }
 1393 
 1394 
 1395 void ln_transform() {
 1396   char analias[80];
 1397   PREAL y;
 1398   int i, n = 0;
 1399 
 1400   out_i(_("Please select column for log-transformation\n") );
 1401   i = getcols(1, 1, 3);
 1402   if(i == 0)
 1403     return;
 1404   strncpy(analias, "ln_", 79);
 1405   strncat(analias, alias[acol[0]], 79-strlen(analias));
 1406   if(col_exist(analias, TRUE) != -1)
 1407     return;
 1408   y = (REAL*)m_calloc(nn[acol[0]], sizeof(REAL));
 1409   for (i=0; i<nn[acol[0]]; i++) {
 1410     if (xx[acol[0]][i] > 0.0)
 1411       y[i] = log(xx[acol[0]][i]);
 1412     else{
 1413       y[i] = SYSMIS;
 1414       if(xx[acol[0]][i] != SYSMIS)
 1415     n++;
 1416     }
 1417   }
 1418 
 1419   if(n == 1)
 1420     out_err(MWA, ERR_FILE, ERR_LINE, _("One value was less or equal to zero"
 1421       " and was transformed into missing value!"));
 1422   if(n > 1)
 1423     out_err(MWA, ERR_FILE, ERR_LINE, _("%i values were less or equal to zero"
 1424       " and were transformed into missing values!"), n);
 1425 
 1426   if (!(make_new_col(analias, nn[acol[0]]))) {
 1427     return;
 1428   }
 1429   FWRITE(y, sizeof(REAL), nn[acol[0]], tmpptr[ncol - 1]);
 1430 }
 1431 /* =================================================================== */
 1432 
 1433 
 1434 void power_10_transform() {
 1435   char analias[80];
 1436   PREAL y;
 1437   int i;
 1438 
 1439   out_i(_("Please select column for exponentiation\n") );
 1440   i = getcols(1, 1, 3);
 1441   if(i == 0)
 1442     return;
 1443   strncpy(analias, "10^_", 79);
 1444   strncat(analias, alias[acol[0]], 79-strlen(analias));
 1445   if(col_exist(analias, TRUE) != -1)
 1446     return;
 1447   y = (REAL*)m_calloc(nn[acol[0]], sizeof(REAL));
 1448   for (i=0; i<nn[acol[0]]; i++) {
 1449     if(xx[acol[0]][i] == SYSMIS)
 1450       y[i] = SYSMIS;
 1451     else
 1452       y[i] = pow(10.0, xx[acol[0]][i]);
 1453   }
 1454 
 1455   if (!(make_new_col(analias, nn[acol[0]]))){
 1456     return;
 1457   }
 1458   FWRITE(y, sizeof(REAL), nn[acol[0]], tmpptr[ncol - 1]);
 1459 }
 1460 
 1461 void power_e_transform() {
 1462   char analias[80];
 1463   PREAL y;
 1464   int i;
 1465 
 1466   out_i(_("Please select column for exponentiation\n") );
 1467   i = getcols(1, 1, 3);
 1468   if(i == 0)
 1469     return;
 1470   strncpy(analias, "e^_", 79);
 1471   strncat(analias, alias[acol[0]], 79-strlen(analias));
 1472   if(col_exist(analias, TRUE) != -1)
 1473     return;
 1474   y = (REAL*)m_calloc(nn[acol[0]], sizeof(REAL));
 1475   for (i=0; i<nn[acol[0]]; i++) {
 1476     if(xx[acol[0]][i] == SYSMIS)
 1477       y[i] = SYSMIS;
 1478     else
 1479       y[i] = exp(xx[acol[0]][i]);
 1480   }
 1481 
 1482   if (!(make_new_col(analias, nn[acol[0]]))){
 1483     return;
 1484   }
 1485   FWRITE(y, sizeof(REAL), nn[acol[0]], tmpptr[ncol - 1]);
 1486 }
 1487 
 1488 
 1489 /* =================================================================== */
 1490 
 1491 
 1492 void inv_transform() {
 1493   char analias[80];
 1494   PREAL y;
 1495   int i;
 1496 
 1497   out_i(_("Please select column for inversion\n") );
 1498   i = getcols(1, 1, 3);
 1499   if(i == 0)
 1500     return;
 1501   strncpy(analias, "inv_", 79);
 1502   strncat(analias, alias[acol[0]], 79-strlen(analias));
 1503   if(col_exist(analias, TRUE) != -1)
 1504     return;
 1505   y = (REAL*)m_calloc(nn[acol[0]], sizeof(REAL));
 1506   for (i=0; i<nn[acol[0]]; i++) {
 1507     if(xx[acol[0]][i] == SYSMIS)
 1508       y[i] = SYSMIS;
 1509     else
 1510       y[i] = 1./xx[acol[0]][i];
 1511   }
 1512 
 1513   if (!(make_new_col(analias, nn[acol[0]]))){
 1514     return;
 1515   }
 1516   FWRITE(y, sizeof(REAL), nn[acol[0]], tmpptr[ncol - 1]);
 1517 }
 1518 
 1519 
 1520 /* =================================================================== */
 1521 
 1522 
 1523 void z_transform() {
 1524   char analias[80];
 1525   PREAL y;
 1526   REAL mean, sdv;
 1527   int i;
 1528 
 1529   out_i(_("Please select column for z-transformation\n") );
 1530   i = getcols(1, 1, TRUE);
 1531   if(i == 0)
 1532     return;
 1533   strncpy(analias, "z_", 79);
 1534   strncat(analias, alias[acol[0]], 79-strlen(analias));
 1535   if(col_exist(analias, TRUE) != -1)
 1536     return;
 1537   y = (REAL*)m_calloc(nn[acol[0]], sizeof(REAL));
 1538   sdv = get_sdv(xx[acol[0]], nn[acol[0]]);
 1539   mean = get_mean(xx[acol[0]], nn[acol[0]]);
 1540   if(nn[acol[0]] != vn[acol[0]])
 1541     alloc_cols(1, 3);
 1542   for (i=0; i<nn[acol[0]]; i++) {
 1543     if(xx[acol[0]][i] == SYSMIS)
 1544       y[i] = SYSMIS;
 1545     else
 1546       y[i] = (xx[acol[0]][i]-mean)/sdv;
 1547   }
 1548 
 1549   if (!(make_new_col(analias, nn[acol[0]]))){
 1550     return;
 1551   }
 1552   FWRITE(y, sizeof(REAL), nn[acol[0]], tmpptr[ncol - 1]);
 1553 }
 1554 
 1555 
 1556 /* =================================================================== */
 1557 
 1558 void sort_col() {
 1559   char analias[80];
 1560   PREAL y;
 1561   int i;
 1562 
 1563   out_i(_("Please select column to be sorted\n") );
 1564   i = getcols(1, 1, 3);
 1565   if(i == 0)
 1566     return;
 1567   strncpy(analias, "sort_", 79);
 1568   strncat(analias, alias[acol[0]], 79-strlen(analias));
 1569   if(col_exist(analias, TRUE) != -1)
 1570     return;
 1571   y = (REAL*)m_calloc(nn[acol[0]], sizeof(REAL));
 1572   for (i=0; i<nn[acol[0]]; i++) {
 1573     y[i] = xx[acol[0]][i];
 1574   }
 1575   qsort(y, nn[acol[0]], sizeof(REAL), real_compar_up);
 1576   if (!(make_new_col(analias, nn[acol[0]]))){
 1577     return;
 1578   }
 1579   FWRITE(y, sizeof(REAL), nn[acol[0]], tmpptr[ncol - 1]);
 1580 }
 1581 
 1582 
 1583 /* =================================================================== */
 1584 
 1585 
 1586 void readcol_from_term() {
 1587   char aline[80], answer[10];
 1588   int n=0;
 1589   REAL temp;
 1590   BOOLEAN ok, stop=FALSE;
 1591 
 1592   if (ncol > 0) {
 1593     out_i(_("Shall all data be deleted? (%s) "), _("y/N") );
 1594     GETNLINE;
 1595     if(!(empty)){
 1596       sscanf(line, "%s", answer);
 1597       if (answer[0] == _("y")[0] || answer[0] == _("Y")[0]) {
 1598     erasetempfiles();
 1599       }
 1600     }
 1601   }
 1602 
 1603   out_i(_("Column %i is being read, stop input with '.'\n"), (ncol+1));
 1604   aline[0] = '1';
 1605   create_columns(1);
 1606 
 1607   while (!stop) {
 1608     ok = FALSE;
 1609     while (!ok) {
 1610       out_d(_("Value %i: "), (n+1));
 1611       fgets(aline, 79, stdin);
 1612       if ( (aline[0]=='.') && (strlen(aline)==2) ) {
 1613     stop = TRUE;
 1614       }
 1615       if ( (sscanf(aline, "%lf", &temp)==1) || (stop) ) {
 1616     ok = TRUE;
 1617       }
 1618       else {
 1619     out_err(ERR, ERR_FILE, ERR_LINE,
 1620       _("Illegal input, please repeat: ") );
 1621       }
 1622 
 1623       if ( (ok) && (!stop) ) {
 1624     n++;
 1625     FWRITE(&temp, sizeof(REAL), 1, tmpptr[ncol - 1]);
 1626       }
 1627     }
 1628   }
 1629   if (n>0) {
 1630     nn[ncol - 1] = n;
 1631   } else{
 1632     delete_column(ncol - 1);
 1633   }
 1634 }
 1635 
 1636 
 1637 
 1638 BOOLEAN str_in_str(const char *s1, const char *s2) {
 1639   int i, n = strlen(s2);
 1640 
 1641   for (i=0; i<n; i++) {
 1642     if (s1[i] != s2[i]) {
 1643       return FALSE;
 1644     }
 1645   }
 1646   return TRUE;
 1647 }
 1648 
 1649 
 1650 BOOLEAN emptyline(const char *s) {
 1651   int i, n = strlen(s);
 1652 
 1653   for (i=0; i<n; i++) {
 1654     if (!isspace((int)s[i])) {
 1655       return FALSE;
 1656     }
 1657   }
 1658   return TRUE;
 1659 }
 1660 
 1661 
 1662 BOOLEAN formatToken(char *token, char *result){
 1663   int i = 0;
 1664   REAL test;
 1665 
 1666   /* Remove leading blanks */
 1667   while(*token == ' ' && *token != 0)
 1668     token++;
 1669   if(*token == 0 || *token == '\r' || *token == '\n'){
 1670     sprintf(result, "%s", NODATA);
 1671     return FALSE;
 1672   }
 1673 
 1674   /* Get the token, including blanks */
 1675   while(*token != 0){
 1676     result[i] = *token;
 1677     token++;
 1678     i++;
 1679   }
 1680 
 1681   /* Remove trailing blanks */
 1682   i--;
 1683   while(result[i] == ' '){
 1684     result[i] = 0;
 1685     i--;
 1686   }
 1687   i++;
 1688 
 1689   result[i] = 0;
 1690   if(sscanf(result, "%lf", &test) == 1){
 1691     if(test == floor(test) && test >= -9999999999999999.0 && test <= 9999999999999999.0)
 1692       snprintf(result, 32, "%-16.16g", test);
 1693     else
 1694       snprintf(result, 32, "%-16.10g", test);
 1695     token = result;
 1696     while(*token != 0){
 1697       token++;
 1698       if(*token == ' '){
 1699     *token = 0;
 1700     break;
 1701       }
 1702     }
 1703     return FALSE;
 1704   } else{
 1705     strcpy(token, result);
 1706     sprintf(result, "\"%s\"", token);
 1707     return TRUE;
 1708   }
 1709 }
 1710 
 1711 void xcols_usage(char * name){
 1712   out_d(_("\nThe option --xcols tells Statist to extract columns from a fixed "
 1713       "width data file.\n\n"));
 1714   out_d(_("\nUsage:\n"
 1715       "%s --xcols config_file data_base dest_file\n\n"), name);
 1716   exit(0);
 1717 }
 1718 
 1719 void extract_cols(int argc, char *argv[]){
 1720   FILE *f1, *f2;
 1721   char b[1000];
 1722   int i, j, k, n = 0, nrows = 0, rlen, max = 100, pos = 0, *begin, *end;
 1723   size_t blen = 64;
 1724   char *b1, **lbel, *token, *ftokn;
 1725   unsigned int l;
 1726   BOOLEAN *alpha;
 1727 
 1728 #ifndef NO_GETTEXT
 1729   SET_C_LOCALE;
 1730 #endif
 1731 
 1732   if(sep == 0)
 1733     sep = ' ';
 1734 
 1735   for(i = 1; i < argc; i++)
 1736     if(strcmp(argv[i], "--xcols") == 0){
 1737       pos = i;
 1738       break;
 1739     }
 1740 
 1741   if((argc - pos) < 4)
 1742     xcols_usage(argv[0]);
 1743 
 1744   lbel = (char**) mymalloc(max * sizeof(char*));
 1745   begin = (int*) mymalloc(max * sizeof(int));
 1746   end = (int*) mymalloc(max * sizeof(int));
 1747   alpha = (BOOLEAN*)mymalloc(max * sizeof(BOOLEAN));
 1748   b1 = (char*) mymalloc(blen * sizeof(char));
 1749 
 1750   /* read config_file */
 1751   FOPEN(argv[pos + 1], "r", f1);
 1752   rlen = get_line(&b1, &blen, f1);
 1753   while (rlen != -1){
 1754     if(b1[0] == '#' || strlen(b1) < 3){
 1755       rlen = get_line(&b1, &blen, f1);
 1756       continue;
 1757     }
 1758     i = 0;
 1759     while(i < 997 && b1[i] != ' ' && b1[i] != '\t'){
 1760       b[i] = b1[i];
 1761       i++;
 1762     }
 1763     b[i] = 0;
 1764     lbel[n] = (char*) mymalloc(i + 1);
 1765     strcpy(lbel[n], b);
 1766     while(!(b1[i] >= '0' && b1[i] <= '9'))
 1767       i++;
 1768     j = 0;
 1769     while(b1[i] >= '0' && b1[i] <= '9'){
 1770       b[j] = b1[i];
 1771       i++; j++;
 1772     }
 1773     b[j] = 0;
 1774     begin[n] = atoi(b) - 1;
 1775     while(!((b1[i] >= '0' && b1[i] <= '9') || b1[i] == '\n'))
 1776       i++;
 1777     j = 0;
 1778     while(b1[i] >= '0' && b1[i] <= '9'){
 1779       b[j] = b1[i];
 1780       i++; j++;
 1781     }
 1782     b[j] = 0;
 1783     if(b[0])
 1784       end[n] = atoi(b) - 1;
 1785     else
 1786       end[n] = begin[n];
 1787     n++;
 1788     if(n == max){
 1789       max += 100;
 1790       lbel = (char**) myrealloc(lbel, (max * sizeof(char*)));
 1791       begin = (int*) myrealloc(begin, (max * sizeof(int)));
 1792       end = (int*) myrealloc(end, (max * sizeof(int)));
 1793       alpha = (BOOLEAN*) myrealloc(alpha, (max * sizeof(BOOLEAN)));
 1794     }
 1795     rlen = get_line(&b1, &blen, f1);
 1796   }
 1797   FCLOSE(f1);
 1798 
 1799   j= 0;
 1800   l = 3;
 1801   for(i = 0; i < n; i++){
 1802     alpha[i] = FALSE;
 1803     j += strlen(lbel[i]) + 30;
 1804     if(j > l)
 1805       l = j;
 1806     if((end[i] - begin[i] + 1) > l)
 1807       l = end[i] - begin[i] + 1;
 1808   }
 1809   if(l < 128)
 1810     l = 128;
 1811   else
 1812     l *= 3;
 1813   token = (char*)mymalloc(l * sizeof(char));
 1814   ftokn = (char*)mymalloc(l * sizeof(char));
 1815 
 1816   /* read from origin, and write to destination */
 1817   FOPEN(argv[pos + 2], "r", f1);
 1818   FOPEN(argv[pos + 3], "w", f2);
 1819   out_d(_("Extracting columns from \"%s\" to \"%s\"...\n"),
 1820       argv[pos + 2], argv[pos + 3]);
 1821   /* Don't put the "#%" string in the first line if the user doesn't seem
 1822    * to use it. */
 1823   if(!(has_header || detect_header))
 1824     fprintf(f2, "#%%");
 1825   for(i = 0; i < (n - 1); i++)
 1826     fprintf(f2, "%s%c", lbel[i], sep);
 1827   fprintf(f2, "%s\n", lbel[n-1]);
 1828   rlen = get_line(&b1, &blen, f1);
 1829   while (rlen != -1){
 1830     for(i = 0; i < n; i++){
 1831       k = 0;
 1832       for(j = begin[i]; j <= end[i]; j++){
 1833     token[k] = b1[j];
 1834     k++;
 1835       }
 1836       token[k] = 0;
 1837       if(formatToken(token, ftokn))
 1838     alpha[i] = TRUE;
 1839       if(i < (n - 1))
 1840     fprintf(f2, "%s%c", ftokn, sep);
 1841       else
 1842     fprintf(f2, "%s\n", ftokn);
 1843     }
 1844     rlen = get_line(&b1, &blen, f1);
 1845     nrows++;
 1846   }
 1847   FCLOSE(f1);
 1848   FCLOSE(f2);
 1849   myfree(b1);
 1850   myfree(begin);
 1851   myfree(end);
 1852   out_d(_("Done: %d columns, %d rows.\n"), n, nrows);
 1853 #ifndef NO_GETTEXT
 1854   RESET_LOCALE;
 1855 #endif
 1856   j = 0;
 1857   for(i = 0; i < n; i++)
 1858     if(alpha[i])
 1859       j = 1;
 1860   if(j){
 1861     out_err(WAR, ERR_FILE, ERR_LINE,
 1862     _("Non-numeric values were found."));
 1863     out_r(_("List of columns with non-numeric values:\n"));
 1864     for(i = 0; i < n; i++)
 1865       if(alpha[i])
 1866     out_r(" %s", lbel[i]);
 1867     out_r("\n");
 1868   }
 1869   for(i = 0; i < n; i++)
 1870     myfree(lbel[i]);
 1871   myfree(lbel);
 1872   myfree(alpha);
 1873 }
 1874 
 1875 void xsample_usage(char * name){
 1876   out_d(_("\nThe option --xsample tells Statist to extract a random sample of\n"
 1877     "rows from a given data file.\n\n"));
 1878   out_d(_("Usage:\n\n"
 1879       "  %s --xsample percentage data_base dest_file\n\n"
 1880       "where \"percentage\" is an integer between 1 and 99.\n\n"), name);
 1881   exit(1);
 1882 }
 1883 
 1884 void extract_sample(int argc, char * argv[]){
 1885   int percent = -1;
 1886   char *s;
 1887   int i, k, n = 0, N = 0, rlen, pos = 0;
 1888   FILE * f1;
 1889   FILE * f2;
 1890   size_t blen = 64;
 1891 #ifndef NO_GETTEXT
 1892   SET_C_LOCALE;
 1893 #endif
 1894 
 1895   for(i = 1; i < argc; i++)
 1896     if(strcmp(argv[i], "--xsample") == 0){
 1897       pos = i;
 1898       break;
 1899     }
 1900 
 1901   if((argc - pos) < 4)
 1902     xsample_usage(argv[0]);
 1903   percent = atoi(argv[pos  + 1]);
 1904   if(percent > 99 || percent < 1){
 1905     out_err(ERR, ERR_FILE, ERR_LINE,
 1906     _("\"%s\" is not a valid value for percentage."), argv[pos + 1]);
 1907     xsample_usage(argv[0]);
 1908   }
 1909 
 1910   s = (char*)mymalloc(blen);
 1911 
 1912   /* read from source, and write to destine */
 1913   srand(time(NULL));
 1914   k = percent * 10;
 1915   FOPEN(argv[pos + 2], "r", f1);
 1916   FOPEN(argv[pos + 3], "w", f2);
 1917 
 1918   out_r(_("Creating a new database with a random sample of approximately\n"
 1919       "%i%% of \"%s\" rows...\n"), percent, argv[3]);
 1920 
 1921   rlen = get_line(&s, &blen, f1);
 1922   while(rlen != -1 && (s[0] == '#' || (s[0] >= 'A' && s[0] <= 'Z') ||
 1923       (s[0] >= 'a' && s[0] <= 'z') || (s[0] == '"' &&
 1924     ((s[1] >= 'A' && s[1] <= 'Z') || (s[1] >= 'a' && s[1] <= 'z'))))){
 1925     fputs(s, f2);
 1926     rlen = get_line(&s, &blen, f1);
 1927   }
 1928   while(rlen != -1){
 1929     i = rand() % 1000;
 1930     if(i < k){
 1931       fputs(s, f2);
 1932       n++;
 1933     }
 1934     rlen = get_line(&s, &blen, f1);
 1935     N++;
 1936   }
 1937   FCLOSE(f1);
 1938   FCLOSE(f2);
 1939   myfree(s);
 1940   out_r(_("Done: selected %d out of %d rows.\n"), n, N);
 1941 #ifndef NO_GETTEXT
 1942   RESET_LOCALE;
 1943 #endif
 1944 }
 1945 
 1946 /* Export current database as fixed width data file */
 1947 void exp_fwdf(){
 1948   int i, j, k, *w;
 1949   char *p, s[32], q[32], dfname[MLINE], cfname[MLINE];
 1950   FILE *df, *cf;
 1951   REAL r;
 1952   if(ncol < 2){
 1953     out_err(ERR, ERR_FILE, ERR_LINE,
 1954     _("The current data file has less than 2 columns!"));
 1955     return;
 1956   }
 1957   for(i = 1; i < ncol; i++)
 1958     if(nn[0] != nn[i]){
 1959       out_err(ERR, ERR_FILE, ERR_LINE,
 1960       _("There are columns with different number of rows!"));
 1961       return;
 1962     }
 1963 
 1964   /* Calculating the necessary width for each column */
 1965   w = (int*)m_calloc(ncol, sizeof(int));
 1966   for(i = 0; i < ncol; i++){
 1967     acol[0] = i;
 1968     alloc_cols(1, FALSE);
 1969     for(j = 0; j < vn[i]; j++){
 1970       r = xx[i][j];
 1971       if(r == floor(r) && r >= -9999999999999999.0 && r <= 9999999999999999.0)
 1972     snprintf(s, 32, "%16.16g", r);
 1973       else
 1974     snprintf(s, 32, "%16.10g", r);
 1975       p = s;
 1976       while(p[0] == ' ')
 1977     p++;
 1978       k = strlen(p);
 1979       if(k > w[i])
 1980     w[i] = k;
 1981     }
 1982   }
 1983   
 1984   out_i(_("Please enter name of the export file: ") );
 1985   GETRLINE;
 1986   sscanf(line, "%s", dfname);
 1987   out_i(_("Please enter name of the list of columns file: ") );
 1988   GETRLINE;
 1989   sscanf(line, "%s", cfname);
 1990   FOPEN(dfname, "wt", df);
 1991   FOPEN(cfname, "wt", cf);
 1992   j = 1;
 1993   k = 0;
 1994 
 1995   /* saving the list of columns */
 1996   for(i = 0; i < ncol; i++){
 1997     k += w[i];
 1998     fprintf(cf, "%s %i-%i\n", alias[i], j, k);
 1999     j += w[i];
 2000   }
 2001   FCLOSE(cf);
 2002   out_d(_("File \"%s\" saved!"), cfname);
 2003   out_d("\n");
 2004   
 2005   /* saving the fixed width datafile */
 2006 #ifndef NO_GETTEXT
 2007   SET_C_LOCALE;
 2008 #endif
 2009   k = sizeof(REAL);
 2010   for(i = 0; i < nn[0]; i++){
 2011     for(j = 0; j < ncol; j++){
 2012       FREAD(&r, k, 1, tmpptr[j]);
 2013       if(r == SYSMIS){
 2014     sprintf(s, "%%%is", w[j]);
 2015         fprintf(df, s, " ");
 2016       } else{
 2017     if(r == floor(r) && r >= -9999999999999999.0 && r <= 9999999999999999.0)
 2018       snprintf(s, 32, "%16.16g", r);
 2019     else
 2020       snprintf(s, 32, "%16.10g", r);
 2021     p = s;
 2022     while(p[0] == ' ')
 2023       p++;
 2024     snprintf(q, 32, "%%%is", w[j]);
 2025     fprintf(df, q, p);
 2026       }
 2027     }
 2028     fprintf(df, "\n");
 2029   }
 2030 #ifndef NO_GETTEXT
 2031   RESET_LOCALE;
 2032 #endif
 2033   
 2034   /* Finishing */
 2035   FCLOSE(df);
 2036   out_r(_("File \"%s\" saved!"), dfname);
 2037   out_r("\n\n");
 2038 }
 2039