"Fossies" - the Fresh Open Source Software Archive 
Member "statist-1.4.2/src/data.c" (21 Oct 2006, 48016 Bytes) of package /linux/privat/old/statist-1.4.2.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "data.c" see the
Fossies "Dox" file reference documentation.
1 /* This file is part of statist
2 **
3 ** It is distributed under the GNU General Public License.
4 ** See the file COPYING for details.
5 **
6 ** (c) 1997 Dirk Melcher
7 ** old email address: Dirk.Melcher@usf.Uni-Osnabrueck.DE
8 **
9 ** adapted for statistX: Andreas Beyer, 1999, abeyer@usf.uni-osnabrueck.de
10 **
11 ** The function get_line() was adapted from GNU coretutils 5.2.1 getndelim2.c
12 ** Copyright (C) 1993, 1996, 1997, 1998, 2000, 2003 Free Software
13 ** Foundation, Inc.
14 **
15 ** published by Bernhard Reiter http://www.usf.Uni-Osnabrueck.DE/~breiter
16 ** $Id: data.c,v 1.44 2006/10/21 23:15:22 jakson Exp $
17 ***************************************************************/
18
19 /* data.c for STATIST */
20 #include <stdio.h>
21 #include <string.h>
22 #include <ctype.h>
23 #include <math.h>
24 #include <stdlib.h>
25 #include <time.h>
26
27 #include "statist.h"
28 #include "data.h"
29 #include "funcs.h"
30 #include "menue.h"
31
32 #include "gettext.h"
33
34 /* ==================================================================== */
35
36 static short int *labelcol;
37 static short int n_lab;
38
39
40 /* ==================================================================== */
41
42 void inflate_MCOL(){
43 int i, oldmax = MCOL;
44 MCOL += 64;
45 xx = (PREAL*)myrealloc(xx, (MCOL * sizeof(PREAL)));
46 nn = (int*)myrealloc(nn, (MCOL * sizeof(int)));
47 vn = (int*)myrealloc(vn, (MCOL * sizeof(int)));
48 acol = (int*)myrealloc(acol, (MCOL * sizeof(int)));
49 x_read = (BOOLEAN*)myrealloc(x_read, (MCOL * sizeof(BOOLEAN)));
50 alias = (char**)myrealloc(alias, (MCOL * sizeof(char*)));
51 tmpptr = (FILE**)myrealloc(tmpptr, (MCOL * sizeof(FILE*)));
52 labelcol = (short int*)myrealloc(labelcol, (MCOL * sizeof(short int)));
53 names = (Labels**)myrealloc(names, (MCOL * sizeof(Labels*)));
54
55 for (i = oldmax; i < MCOL; i++){
56 xx[i] = NULL;
57 nn[i] = 0;
58 vn[i] = 0;
59 acol[i] = 0;
60 x_read[i] = FALSE;
61 alias[i] = NULL;
62 tmpptr[i] = NULL;
63 alias[i] = NULL;
64 names[i] = NULL;
65 }
66 }
67
68 char * get_default_label(int i){
69 char * newlabel;
70 char * tempLabel = (char*)mycalloc(4, sizeof(char));
71 if((i+CHAR_OFFSET) <= 'z'){
72 tempLabel[0] = (char)(i+CHAR_OFFSET);
73 } else{
74 tempLabel[0] = (char)(((i + 26) / 26) + CHAR_OFFSET - 2);
75 tempLabel[1] = (char)((i % 26) + CHAR_OFFSET);
76 }
77 newlabel = (char*)mymalloc(sizeof(char) * (strlen(tempLabel) + 1));
78 strcpy(newlabel, tempLabel);
79 myfree(tempLabel);
80 return(newlabel);
81 }
82
83 void create_columns(int amount){
84 int i;
85 for(i = 0; i < amount; i++){
86 if(ncol == MCOL)
87 inflate_MCOL();
88 if(alias[ncol] == NULL)
89 alias[ncol] = get_default_label(ncol);
90 tmpptr[ncol] = tmpfile();
91 if(tmpptr[ncol] == NULL){
92 out_err(FAT, ERR_FILE, ERR_LINE,
93 _("System reports error while opening temporary file:\n \"%s\""),
94 STRERROR(errno));
95 }
96 ncol++;
97 }
98 }
99
100 /* Free allocated memory, but not the temporary file */
101 void free_column(int i){
102 if((x_read[i])){
103 myfree(xx[i]);
104 xx[i] = NULL;
105 x_read[i] = FALSE;
106 vn[i] = 0;
107 }
108 }
109
110 /* Free allocated memory and erase temporary file */
111 void delete_column(int i){
112 free_column(i);
113 if(alias[i]) {
114 myfree(alias[i]);
115 alias[i] = NULL;
116 }
117 names[i] = NULL;
118 if(tmpptr[i]){
119 FCLOSE(tmpptr[i]);
120 tmpptr[i] = NULL;
121 }
122 nn[i] = 0;
123 x_read[i] = FALSE;
124 labelcol[i] = 0;
125 ncol--;
126 }
127
128 void erasetempfiles() {
129 int i;
130 if(MCOL == 0)
131 return;
132 out_d(_("Removing temporary files ...") );
133 for(i = 0; i < MCOL; i++)
134 delete_column(i);
135 myfree(xx);
136 myfree(alias);
137 myfree(nn);
138 myfree(vn);
139 myfree(acol);
140 myfree(x_read);
141 myfree(tmpptr);
142 myfree(labelcol);
143 myfree(names);
144 xx = NULL;
145 alias = NULL;
146 nn = NULL;
147 vn = NULL;
148 acol = NULL;
149 x_read = NULL;
150 tmpptr = NULL;
151 labelcol = NULL;
152 names = NULL;
153 out_d(_(" done\n") );
154 n_lab = 0;
155 ncol = 0;
156 MCOL = 0;
157 }
158
159
160 /* ==================================================================== */
161
162
163 /* Adapted from GNU coretutils 5.2.1 getndelim2.c */
164 int get_line(char **lineptr, size_t *linesize, FILE *stream){
165 register int c;
166 int pos = -1; /* index of last byte read */
167 char * line = *lineptr;
168 size_t max = *linesize - 1;
169 for(;;){
170 c = getc (stream);
171 if(c == EOF){
172 /* Return partial line, if any. */
173 if (pos == -1)
174 return -1;
175 else
176 break;
177 }
178 if(pos == max){
179 max += 64;
180 *linesize += 64;
181 *lineptr = myrealloc(*lineptr, *linesize);
182 line = *lineptr;
183 }
184 pos++;
185 line[pos] = c;
186 if (c == '\n')
187 /* Return the line. */
188 break;
189 }
190 pos++;
191 line[pos] = '\0';
192 pos++;
193 return pos;
194 }
195
196 /* =================================================================== */
197
198 void attach_labels_to_columns(){
199 int i;
200 Labels *ptr = first_labels;
201 for(i = 0; i < ncol; i++)
202 names[i] = NULL;
203 while(ptr){
204 for(i = 0; i < ncol; i++){
205 if(strcmp(alias[i], ptr->clabel) == 0){
206 names[i] = ptr;
207 break;
208 }
209 }
210 ptr = ptr->next;
211 }
212 }
213
214 void delete_labels(Labels *ptr){
215 int i;
216 Labels *p;
217 if(ptr == first_labels){
218 first_labels = first_labels->next;
219 } else{
220 p = first_labels;
221 while(p->next != ptr)
222 p = p->next;
223 p->next = ptr->next;
224 }
225 if(ptr->clabel)
226 myfree(ptr->clabel);
227 if(ptr->ctitle)
228 myfree(ptr->ctitle);
229 if(ptr->n > 0){
230 for(i = 0; i < ptr->n; i++)
231 myfree(ptr->l[i]);
232 }
233 if(ptr->v)
234 myfree(ptr->v);
235 if(ptr->l)
236 myfree(ptr->l);
237 myfree(ptr);
238 }
239
240 /* Delete problematic Labels */
241 void check_labels(){
242 Labels *next, *ptr = first_labels;
243 while(ptr){
244 next = ptr->next;
245 if(ptr->clabel == NULL || (ptr->ctitle == NULL && ptr->n == 0))
246 delete_labels(ptr);
247 ptr = next;
248 }
249 }
250
251 /* Creates a linked list of "Labels". The list of Labels might have labels for
252 * columns that don't exist in the current datafile. */
253 void read_labels(char *labelsfile){
254 BOOLEAN getting_labels = FALSE;
255 Labels *ptr = NULL;
256 FILE *F;
257 int i, max = 0;
258 char b[255], *s, t[255];
259 FOPEN(labelsfile, "r", F);
260 while(fgets(b, 254, F)){
261 s = b;
262 while(s[0] == ' ' || s[0] == '\t')
263 s++;
264 if(strlen(s) < 2){
265 if(getting_labels){
266 getting_labels = FALSE;
267 }
268 continue;
269 }
270 if(s[0] == '#')
271 continue;
272 i = 0;
273 if(!getting_labels){
274 if(ptr == NULL){
275 first_labels = (Labels*)mycalloc(1, sizeof(Labels));
276 ptr = first_labels;
277 } else{
278 ptr->next = (Labels*)mycalloc(1, sizeof(Labels));
279 ptr = ptr->next;
280 }
281 while(!(s[i] == ' ' || s[i] == '\t' || s[i] == '\n' || s[i] == '\r')){
282 t[i] = s[i];
283 i++;
284 }
285 t[i] = 0;
286 ptr->clabel = (char*)mymalloc((strlen(t) + 1) * sizeof(char));
287 strcpy(ptr->clabel, t);
288 s += i;
289 while(s[0] == ' ' || s[0] == '\t')
290 s++;
291 i = 0;
292 while(!(s[i] == '\n' || s[i] == '\r')){
293 t[i] = s[i];
294 i++;
295 }
296 t[i] = 0;
297 if(strlen(t) > 1){
298 ptr->ctitle = (char*)mymalloc((strlen(t) + 1) * sizeof(char));
299 strcpy(ptr->ctitle, t);
300 }
301 max = 0;
302 ptr->n = 0;
303 getting_labels = TRUE;
304 } else{
305 if(ptr->n == max){
306 max += 10;
307 ptr->v = (REAL*)myrealloc(ptr->v, (max * sizeof(REAL)));
308 ptr->l = (char**)myrealloc(ptr->l, (max * sizeof(char*)));
309 }
310 while(!(s[i] == ' ' || s[i] == '\t' || s[i] == '\n' || s[i] == '\r')){
311 t[i] = s[i];
312 i++;
313 }
314 t[i] = 0;
315 if(sscanf(t, "%lf", &(ptr->v[ptr->n])) == 1){
316 s += i;
317 while(s[0] == ' ' || s[0] == '\t')
318 s++;
319 i = 0;
320 while(!(s[i] == '\n' || s[i] == '\r')){
321 t[i] = s[i];
322 i++;
323 }
324 t[i] = 0;
325 i = strlen(t);
326 if(i > 1){
327 ptr->l[ptr->n] = (char*)mymalloc((i+1) * sizeof(char));
328 strcpy(ptr->l[ptr->n], t);
329 ptr->n++;
330 } else
331 continue;
332 } else
333 continue;
334 }
335 }
336 FCLOSE(F);
337 check_labels();
338 if(first_labels == NULL)
339 out_err(ERR, ERR_FILE, ERR_LINE,
340 _("No labels found in \"%s\"!"), labelsfile);
341 else
342 attach_labels_to_columns();
343 }
344
345 void set_fileformat(){
346 char answer[80];
347 int status = 1;
348
349 out_i(_("Does the file contain the column names? (%s) "), _("y/n"));
350 GETRLINE;
351 status = sscanf(line, "%s", answer);
352 if (status == 0)
353 return;
354 if(answer[0] == _("y")[0] || answer[0] == _("Y")[0]){
355 has_header = TRUE;
356 noheader = FALSE;
357 } else
358 /* FIXME: can't translate "N" because this string is already used in
359 * Frequency table and Compare means */
360 if(answer[0] == _("n")[0] || answer[0] == 'N'){
361 has_header = FALSE;
362 noheader = TRUE;
363 } else
364 return;
365
366 do{
367 out_i(_("Decimal delimiter [%c]: "), dec);
368 GETBLINE;
369 status = sscanf(line, "%s\n", answer);
370 if (status == 0)
371 return;
372 if(answer[0] == ',' || answer[0] == '.')
373 dec = answer[0];
374 else
375 out_err(WAR, ERR_FILE, ERR_LINE,
376 _("Invalid decimal delimiter: '%c'. Please, choose either ',' or '.'"), answer[0]);
377 } while(!(answer[0] == ',' || answer[0] == '.'));
378
379 do{
380 if(sep){
381 if(sep == '\t')
382 strcpy(answer, "\\t");
383 else
384 sprintf(answer, "%c", sep);
385 } else
386 sprintf(answer, " ,;\\t");
387 out_i(_("Field separator ( \\t,;)[%s]: "), answer);
388 GETBLINE;
389 status = sscanf(line, "%s\n", answer);
390 if(line[0] == ' ')
391 strcpy(answer, " ");
392 if (status == 0)
393 return;
394 if(answer[0] == ',' || answer[0] == ';' || answer[0] == ' ')
395 sep = answer[0];
396 else
397 if(strcmp(answer, "\\t") == 0)
398 sep = '\t';
399 else{
400 out_err(WAR, ERR_FILE, ERR_LINE, _("Invalid field separator: '%c'"), answer[0]);
401 status = 0;
402 }
403 } while(status == 0);
404
405 out_i(_("What string indicates missing values? [%s]: "), NODATA);
406 GETNLINE;
407 if(!empty){
408 status = sscanf(line, "%s\n", answer);
409 if (status == 0)
410 return;
411 myfree(NODATA);
412 NODATA = (char*) mymalloc((strlen(answer) + 1) * sizeof(char));
413 strcpy(NODATA, answer);
414 }
415 }
416
417 void show_file_head(char *fn){
418 FILE *source;
419 char *aline;
420 char *b;
421 int i, rlen;
422 size_t blen=64;
423
424 FOPEN(fn, "rt", source);
425 aline = (char*)mymalloc(blen);
426 rlen = get_line(&aline, &blen, source);
427 i = 0;
428
429 out_i(_("First lines of \"%s\":"), fn);
430 out_d("\n\n");
431 set_winsize();
432 b = (char*)m_calloc(SCRCOLS + 4, sizeof(char));
433 while (rlen > -1 && i < 10){
434 snprintf(b, SCRCOLS, "%s", aline);
435 b[SCRCOLS - 3] = '\n';
436 b[SCRCOLS - 2] = 0;
437 out_d(" %s", b);
438 rlen = get_line(&aline, &blen, source);
439 i++;
440 }
441 out_d("\n\n");
442 FCLOSE(source);
443 myfree(aline);
444 }
445
446 void remove_quotes(char *s){
447 int i = 0, j = 0, l;
448 l = strlen(s);
449 while(i < l){
450 if(s[i] != '"'){
451 s[j] = s[i];
452 j++;
453 }
454 i++;
455 }
456 s[j] = 0;
457 }
458
459 int parsecomment(const char *theline, BOOLEAN is_comment) {
460 int new = 0, j, n, is_rpt;
461 char *s, *token, *comment;
462 char ignore[] = " ,;\"\n\t";
463 char *var_id;
464
465 if(is_comment){
466 var_id = (char*)mymalloc(sizeof(char) * 3);
467 strcpy(var_id, "#%");/* this char indicates the line contains labels */
468 } else{
469 var_id = (char*)m_calloc(1, 1);
470 var_id[0] = 0;
471 }
472
473 comment = (char*)m_calloc(sizeof(char), (strlen(theline) + 1));
474 strcpy(comment, theline);
475
476 if (strstr(comment, var_id)!=comment) { /* no valid Var - comment */
477 if(strstr(comment, "#!")==comment && strcmp(var_id,"#!")!=0 )
478 out_err(WAR, ERR_FILE, ERR_LINE,
479 _("'#!' is an illegal indicator of a column of labels.") );
480 return -1;
481 }
482 s = comment+strlen(var_id); /* jump over var_id */
483 n = ncol;
484 while ((token = strtok(s, ignore))!= NULL) {
485 s = NULL;
486 if (token[0]=='$') {
487 if(labelcol == NULL)
488 inflate_MCOL();
489 labelcol[n_lab] = n;
490 n_lab++;
491 out_d(_("Label in column %i='%s'\n"), (n+1), token);
492 }
493 else {
494 if(n >= MCOL)
495 inflate_MCOL();
496 if(alias[n] != NULL)
497 myfree(alias[n]);
498 is_rpt = FALSE; /* avoiding repeated labels */
499 for (j=0; j <n; j++)
500 if (strcmp(alias[j], token) == 0)
501 is_rpt = TRUE;
502 if (is_rpt){
503 alias[n] = (char*) mymalloc((strlen(token)+2) * sizeof(char));
504 strcpy(alias[n], token);
505 alias[n][strlen(token)] = '_';
506 alias[n][strlen(token)+1] = 0;
507 }
508 else{
509 alias[n] = (char*) mymalloc((strlen(token)+1) * sizeof(char));
510 strcpy(alias[n], token);
511 }
512 n++;
513 new++;
514 }
515 }
516
517 if (new == 0 && is_comment) {
518 if(silent)
519 out_err(FAT, ERR_FILE, ERR_LINE, _("No variables found in comment!"));
520 out_err(ERR, ERR_FILE, ERR_LINE, _("No variables found in comment!"));
521 }
522
523 return new; /* number of new labels (not counting number of $labels) */
524 }
525
526 /* ==================================================================== */
527
528 void delete_last_columns(int i){
529 int j;
530 for(j = 0; j < i; j++){
531 delete_column(ncol - 1);
532 }
533 }
534
535 void put_dots(char * s, char c){
536 while(*s){
537 if(*s == c)
538 *s = '.';
539 s++;
540 }
541 }
542
543 void clean_the_line(char *s){
544 char *b = s;
545 int i = 0;
546 while(*s){
547 if(*s == '"')
548 i = !i;
549 if(i && *s == ',')
550 *s = '.';
551 s++;
552 }
553
554 /* If (dec == ',' && sep == ','), the data is quoted and, thus, there is no
555 * need of replacing commas with dots because this was already done. */
556 if(dec == ',' && sep != ',')
557 put_dots(b, dec);
558 /* If sep was defined, and the missing values are quoted, the token will be
559 * "\"M\"" */
560 if(sep)
561 remove_quotes(b);
562 }
563
564 void finish_readingsource(FILE *F, char *s, BOOLEAN try_again){
565 FCLOSE(F);
566 myfree(s);
567 #ifndef NO_GETTEXT
568 RESET_LOCALE;
569 #endif
570
571 /* If ask_fileformat == TRUE, the user was already asked about the file format */
572 char answer[80];
573 static int attempts = 0;
574 if(try_again && !silent && !ask_fileformat && attempts < 3){
575 out_d(_("Statist failed to open the file. Perhaps it\n"
576 "didn't detect the file format correctly.\n"));
577 out_i(_("Would you like to set the file format? (%s) "), _("Y/n"));
578 GETNLINE;
579 out_d("\n");
580 if(!empty){
581 sscanf(line, "%s", answer);
582 if(answer[0] == _("n")[0] || answer[0] == 'N'){
583 attempts = 0;
584 return;
585 }
586 }
587 attempts++;
588 show_file_head(sourcename);
589 set_fileformat();
590 readsourcefile();
591 }
592 attempts = 0;
593 }
594
595 void readsourcefile(){
596 FILE *source;
597 char *aline; /* current line, old line */
598 int i, j, newlabs=0, newcol=0, actcol, colread = 0, lread=0, i_lab=0, rlen;
599 size_t blen=64;
600 REAL test;
601 char ignore[]= " ,;\"\n\t\0", *ptr, *token = NULL;
602 BOOLEAN statist_labels = FALSE, header_OK = FALSE;
603 #ifndef NO_GETTEXT
604 SET_C_LOCALE;
605 #endif
606
607 if(ask_fileformat){
608 show_file_head(sourcename);
609 set_fileformat();
610 }
611
612 FOPEN(sourcename, "rt", source);
613
614 aline = (char*)mymalloc(blen);
615 rlen = get_line(&aline, &blen, source);
616 lread ++;
617 if(noheader){ /* Don't scan the first lines looking for a header */
618 while (rlen > -1 && (emptyline(aline) || aline[0] == COMMENT)){
619 rlen = get_line(&aline, &blen, source);
620 lread ++;
621 }
622 } else{
623
624 /* Seek column labels, skipping true commentaries and empty lines */
625 while (rlen > -1 && (emptyline(aline) || aline[0] == COMMENT)){
626 if(aline[0] == COMMENT && (aline[1] == '%' || aline[1] == '!')){
627 newlabs = parsecomment(aline, TRUE);
628 if(newlabs)
629 statist_labels = TRUE;
630 }
631 rlen = get_line(&aline, &blen, source);
632 lread ++;
633 }
634 if(rlen == -1){
635 if(silent){
636 out_err(FAT, ERR_FILE, ERR_LINE,
637 _("Couldn't find data in file \"%s\"!"), sourcename);
638 } else{
639 out_err(ERR, ERR_FILE, ERR_LINE,
640 _("Couldn't find data in file \"%s\"!"), sourcename);
641 finish_readingsource(source, aline, FALSE);
642 return;
643 }
644 }
645
646 /* Read the first line as if it contains the column names because the user
647 * passed the command line option --header */
648 if(!statist_labels && has_header){
649 newlabs = (parsecomment(aline, FALSE));
650 if(newlabs > 0)
651 header_OK = TRUE;
652 for(i = 0; i < newlabs; i++){
653 j = i + ncol;
654 if(!((alias[j][0] >= 'A' && alias[j][0] <= 'Z')
655 || (alias[j][0] >= 'a' && alias[j][0] <= 'z'))){
656 out_err(WAR, ERR_FILE, ERR_LINE,
657 _("Name of column %d doesn't begin with an ascii letter: \"%s\"."),
658 j+1, alias[j]);
659 break;
660 }
661 }
662 } else{
663 /* Read the first line as if it contains the column names, but drop the
664 * names if they don't appear to be valid ones. */
665 if(!statist_labels && detect_header){
666 newlabs = (parsecomment(aline, FALSE));
667 if(newlabs > 0){
668 header_OK = TRUE;
669 for(i = 0; i < newlabs; i++){
670 j = i + ncol;
671 if(!((alias[j][0] >= 'A' && alias[j][0] <= 'Z')
672 || (alias[j][0] >= 'a' && alias[j][0] <= 'z'))){
673 for(j = ncol; j < (ncol+newlabs); j++){
674 myfree(alias[j]);
675 alias[j] = NULL;
676 }
677 header_OK = FALSE;
678 newlabs = 0;
679 break;
680 }
681 }
682 }
683 }
684 }
685
686 if(newlabs > 0){
687 for(i = ncol; i < (ncol+newlabs); i++)
688 out_d(_("Column %i = %s\n"), i+1, alias[i]);
689 }
690
691 /* Read another line if we successfully read the column names, although
692 * the data file doesn't have the "#%" string. */
693 if(header_OK){
694 rlen = get_line(&aline, &blen, source);
695 lread ++;
696 if(verbose && detect_header && !has_header){
697 out_d("\n");
698 if(newlabs == 1)
699 out_d(_("One valid column name found!"));
700 else
701 out_d(_("Valid column names found!"));
702 out_d("\n\n");
703 }
704 }
705 } /* End of "if(noheader)" */
706
707 /* Parse the first line of data, but don't put the data in the temp files
708 yet. Just count the number of columns. */
709 char *linecopy = (char*)m_calloc(2, blen);
710 strcpy(linecopy, aline);
711 clean_the_line(linecopy);
712
713 ptr = linecopy;
714 if(sep){ /* A field separator was defined. */
715 newcol = 1;
716 j = strlen(ptr);
717 for(i = 0; i < j; i++)
718 if(ptr[i] == sep)
719 newcol++;
720 } else{
721 while ((token = strtok(ptr, ignore))!= NULL) {
722 ptr = NULL;
723 while ((i_lab<n_lab) && (newcol==labelcol[i_lab])) {
724 i_lab++;
725 token = strtok(ptr, ignore);
726 }
727 if (token==NULL) {
728 break;
729 }
730 if ((strcmp(token, NODATA) == 0) || (sscanf(token, "%lf", &test) == 1)) {
731 newcol++;
732 }
733 else {
734 if(silent){
735 out_err(FAT, ERR_FILE, ERR_LINE,
736 _("Illegal format of value '%s' in line %i!\n"
737 "Couldn't read file %s!"), token, lread, sourcename);
738 } else{
739 out_err(ERR, ERR_FILE, ERR_LINE,
740 _("Illegal format of value '%s' in line %i!\n"
741 "Couldn't read file %s!"), token, lread, sourcename);
742 finish_readingsource(source, aline, TRUE);
743 return;
744 }
745 }
746 }
747 }
748
749 /* Check whether n_columns == n_labels */
750 if ((newlabs != 0) && (newlabs != newcol)) {
751 if(silent){
752 out_err(FAT, ERR_FILE, ERR_LINE,
753 _("Number of columns (%d) does not equal number of labels (%d)!"),
754 newcol, newlabs);
755 } else{
756 out_err(ERR, ERR_FILE, ERR_LINE,
757 _("Number of columns (%d) does not equal number of labels (%d)!"),
758 newcol, newlabs);
759 finish_readingsource(source, aline, TRUE);
760 return;
761 }
762 }
763
764 create_columns(newcol);
765
766 /* Finally, read data */
767 out_d(_("Reading %i columns ...\n"), newcol);
768 BOOLEAN endofline;
769 if(sep)
770 token = (char*)mymalloc(256);
771 do {
772 if ((!emptyline(aline)) && (aline[0]!=COMMENT)) {
773 colread = 0;
774 i_lab = 0;
775 clean_the_line(aline);
776 ptr = aline;
777 j = 0;
778 endofline = FALSE;
779
780 while (!endofline) {
781 if(sep){
782 i = 0;
783 while(!(*ptr == sep || *ptr == '\n')){
784 token[i] = *ptr;
785 i++;
786 ptr++;
787 }
788 token[i] = 0;
789 if(i == 0)
790 strcpy(token, NODATA);
791 if(*ptr == '\n'){
792 endofline = TRUE;
793 }
794 ptr++;
795 actcol = j + (ncol-newcol);
796 } else{
797 if((token = strtok(ptr, ignore)) == NULL)
798 break;
799 ptr = NULL;
800 actcol = j + (ncol-newcol);
801 while ((i_lab<n_lab) && (j==labelcol[i_lab])) {
802 i_lab++;
803 token = strtok(ptr, ignore);
804 }
805 if (token==NULL) {
806 break;
807 }
808 }
809 if (j>=newcol) {
810 if(silent){
811 out_err(FAT, ERR_FILE, ERR_LINE,
812 _("Too many columns in row %i. (%d columns)"), lread, j+1);
813 } else{
814 out_err(ERR, ERR_FILE, ERR_LINE,
815 _("Too many columns in row %i. (%d columns)"), lread, j+1);
816 delete_last_columns(newcol);
817 finish_readingsource(source, aline, TRUE);
818 return;
819 }
820 }
821
822 if (strcmp(token, NODATA) == 0 || strcmp(token, "nan") == 0){
823 FWRITE(&SYSMIS, sizeof(REAL), 1, tmpptr[actcol]);
824 nn[actcol] ++;
825 colread ++;
826 }
827 else if (sscanf(token, "%lf", &test)==1) {
828 FWRITE(&test, sizeof(REAL), 1, tmpptr[actcol]);
829 nn[actcol] ++;
830 colread ++;
831 }
832 else {
833 if(silent){
834 out_err(FAT, ERR_FILE, ERR_LINE,
835 _("Illegal format of value '%s' in line %i!"), token, lread);
836 } else{
837 out_err(ERR, ERR_FILE, ERR_LINE,
838 _("Illegal format of value '%s' in line %i!"), token, lread);
839 delete_last_columns(newcol);
840 finish_readingsource(source, aline, TRUE);
841 return;
842 }
843 }
844 j++;
845 }
846 }
847
848 if (colread != newcol) {
849 if(silent){
850 out_err(FAT, ERR_FILE, ERR_LINE,
851 _("Row %i contains just %i instead of %i columns!"),
852 (lread), colread, newcol);
853 } else{
854 out_err(ERR, ERR_FILE, ERR_LINE,
855 _("Row %i contains just %i instead of %i columns!"),
856 (lread), colread, newcol);
857 delete_last_columns(newcol);
858 finish_readingsource(source, aline, TRUE);
859 return;
860 }
861 }
862 rlen = get_line(&aline, &blen, source);
863 lread ++;
864 } while (rlen != -1);
865
866 out_d(_("\nRead data sets: \n") );
867 for (j=0; j<newcol; j++) {
868 actcol = j + (ncol-newcol);
869 out_d(_("Column %s: %i\n"), alias[actcol], nn[actcol]);
870 }
871
872 finish_readingsource(source, aline, FALSE);
873 }
874
875 /* ==================================================================== */
876
877
878 void newsourcefile() {
879 char answer[3], newsourcename[80];
880 FILE *source;
881
882 ls();
883 out_i(_("Name of data file: ") );
884 GETRLINE;
885 sscanf(line, "%s", newsourcename);
886 out_d("\n\n");
887
888 while ((source = fopen(newsourcename,"rt")) == NULL)
889 {
890 out_i(_("File \"%s\" not found!\n"), newsourcename);
891 out_i(_("Please enter new file name: ") );
892 GETRLINE;
893 sscanf(line, "%s", newsourcename);
894 out_d("\n");
895 }
896 FCLOSE(source);
897 show_file_head(newsourcename);
898 if(ncol > 0){
899 out_i(_("Shall the old data be removed? (%s) "), _("y/N") );
900 GETNLINE;
901 if(!(empty)){
902 sscanf(line, "%s", answer);
903 if (answer[0] == _("y")[0] || answer[0] == _("Y")[0]) {
904 erasetempfiles();
905 }
906 }
907 }
908 if(sourcename)
909 myfree(sourcename);
910 sourcename = (char*) mycalloc(strlen(newsourcename) + 1, sizeof(char));
911 strcpy(sourcename, newsourcename);
912 readsourcefile();
913 if (log_set) {
914 fprintf(logfile, "-----------------------------------------------------\n");
915 fprintf(logfile,
916 _("\nNew source file: %s\n\n") , sourcename);
917 }
918 attach_labels_to_columns();
919 }
920
921
922 /* =================================================================== */
923
924 int getcols(int min, int max, BOOLEAN eraserow){
925 char salias[80];
926 int i, j, w, nc, nr;
927 BOOLEAN inputok, found;
928 salias[79] = 0;
929 if(ncol == 0)
930 return 0;
931 if(ncol < min){
932 if(ncol == 1)
933 strncpy(salias, _("but this data file has just 1 column!"), 79);
934 else
935 snprintf(salias, 80, _("but this data file has only %i columns!"), ncol);
936 out_err(ERR, ERR_FILE, ERR_LINE,
937 _("This analysis requires at least %i columns,\n %s"), min, salias);
938 return 0;
939 }
940
941 out_d("\n");
942 out_d(_("Columns: ") );
943 if(format_columns_out){
944 set_winsize();
945 out_d("\n");
946 w = 0;
947 for(j = 0; j < ncol; j++)
948 if(strlen(alias[j]) > w)
949 w = strlen(alias[j]);
950 w += 2;
951 nc = SCRCOLS / w;
952 nr = 1 + (ncol / nc);
953 if(ncol > nc && (ncol % nc) != 0)
954 nr++;
955 snprintf(salias, 80, "%%-%is", w);
956 for(i = 0; i < nr; i++){
957 for(j = 0; j < nc; j++){
958 w = nc * i + j;
959 if(w < ncol)
960 out_d(salias, alias[w]);
961 }
962 out_d("\n");
963 }
964 } else{
965 for (j = 0; j < ncol; j++) {
966 out_d("%s ", alias[j]);
967 }
968 }
969 out_d("\n");
970
971 i = 0;
972 do{
973 inputok = FALSE;
974 found = FALSE;
975 while (!inputok) {
976 if(max > 1)
977 out_i(_("Column for variable %i: "), (i+1));
978 else
979 out_i(_("Column name: "));
980 GETBLINE;
981 sscanf(line, "%s", salias);
982
983 if (strcmp(line, _(_ALL_)) == 0) {
984 if(max < ncol){
985 out_err(ERR, ERR_FILE, ERR_LINE,
986 _("Please, choose at most %i columns!"), max);
987 break;
988 }
989 for (j = 0; j < ncol; j++) {
990 acol[j] = j;
991 }
992 alloc_cols(ncol, eraserow);
993 return ncol;
994 }
995
996 /* check if column name is matched exactly */
997 for (j=0; j<ncol; j++) {
998 if (strcmp(alias[j], salias)==0) {
999 acol[i] = j;
1000 inputok = TRUE;
1001 i++;
1002 break;
1003 }
1004 }
1005 if (inputok) {
1006 break; /* exact column alias entered -> go on */
1007 }
1008
1009 /* try to complete entered column alias */
1010 for (j=0; j<ncol; j++) {
1011 if (str_in_str(alias[j], salias)) {
1012 if (found) {
1013 out_err(ERR, ERR_FILE, ERR_LINE,
1014 _("Column name '%s' is not unique!"), salias);
1015 inputok = FALSE;
1016 i--;
1017 break;
1018 }
1019 else {
1020 found = TRUE;
1021 inputok = TRUE;
1022 acol[i] = j;
1023 i++;
1024 }
1025 }
1026 }
1027 if ((!inputok) && (!found)) {
1028 out_err(ERR, ERR_FILE, ERR_LINE,
1029 _("Column %s does not exist!"), salias);
1030 }
1031 else if ((!inputok) && (found)) {
1032 found = FALSE;
1033 }
1034 }
1035 } while(!empty && i < max);
1036
1037 if(i < min){
1038 if(i > 0)
1039 out_err(ERR, ERR_FILE, ERR_LINE,
1040 _("At least %i columns have to be selected!"), min);
1041 return 0;
1042 }
1043 if(eraserow == 1 && !(equal_rows(i))){
1044 out_err(ERR, ERR_FILE, ERR_LINE, _("The columns must have "
1045 "the same number of data points for this analysis!"));
1046 return 0;
1047 }
1048
1049 out_d("\n");
1050 if (log_set) {
1051 fprintf(logfile, "-----------------------------------------------------------\n\n");
1052 }
1053
1054 alloc_cols(i, eraserow);
1055 return(i);
1056 }
1057
1058 void printcols() {
1059 int i, j, k, n, r, p, q, w;
1060 char b[50], b2[50], *header;
1061 BOOLEAN labelfound = FALSE;
1062 b[49] = 0;
1063
1064 /* Choosing columns */
1065 set_winsize();
1066 k = (SCRCOLS / 11) - 1;
1067 if(ncol < k){ /* show all columns if the screen is width enough */
1068 for (j = 0; j < ncol; j++) {
1069 acol[j] = j;
1070 }
1071 alloc_cols(ncol, 3);
1072 n = ncol;
1073 } else{
1074 n = getcols(1, ncol, 3);
1075 }
1076 if(n == 0)
1077 return;
1078
1079 /* Determining number of rows */
1080 k = nn[acol[0]];
1081 j = strlen(alias[acol[0]]) + 10;
1082 for(i = 1; i < n; i++){
1083 j += strlen(alias[acol[i]]);
1084 if(nn[acol[i]] > k)
1085 k = nn[acol[i]];
1086 }
1087
1088 /* Creating header */
1089 if(j < ((n + 1) * 16))
1090 j = (n + 1) * 16;
1091 header = (char*)m_calloc(j, sizeof(char));
1092 strcpy(header, " ");
1093 for(i = 0; i < n; i++){
1094 if(names[acol[i]] && names[acol[i]]->n > 0)
1095 snprintf(b, 50, "%-10s ", alias[acol[i]]);
1096 else
1097 snprintf(b, 50, "%10s ", alias[acol[i]]);
1098 strcat(header, b);
1099 }
1100 strcat(header, "\n");
1101
1102 /* Printing data */
1103 out_r(_("Data from columns:\n"));
1104 colorize(ClHeader);
1105 out_r(header);
1106 colorize(ClDefault);
1107 j = 0;
1108 p = 2; /* already printed lines = column_names + wait_message */
1109 int sz;
1110 while(j < k){
1111 colorize(ClLineNum);
1112 out_r("%5i: ", (j + 1));
1113 colorize(ClDefault);
1114 for(i = 0; i < n; i++){
1115 if(j < nn[acol[i]]){
1116 if(xx[acol[i]][j] == SYSMIS)
1117 out_r("%10c ", '.');
1118 else{
1119 if(names[acol[i]])
1120 for(q = 0; q < names[acol[i]]->n; q++)
1121 if(names[acol[i]]->v[q] == xx[acol[i]][j]){
1122 labelfound = TRUE;
1123 strncpy(b, names[acol[i]]->l[q], 10);
1124 b[10] = 0;
1125 sz = 10;
1126 if(is_utf8){
1127 /* If there are non-ascii chars, we truncated the label prematurely*/
1128 while(stringLen(b) < 10 && sz < 40){
1129 sz++;
1130 strncpy(b, names[acol[i]]->l[q], sz);
1131 }
1132
1133 /* Avoiding truncating multibyte char, at least in Latin 1. */
1134 w = strlen(b);
1135 if(b[w-1] == (char)0xC3){
1136 sz++;
1137 strncpy(b, names[acol[i]]->l[q], sz);
1138 }
1139 sz = 10 + strlen(b) - stringLen(b);
1140 }
1141
1142 sprintf(b2, "%%-%ds ", sz);
1143 out_r(b2, b);
1144 }
1145 if(labelfound)
1146 labelfound = FALSE;
1147 else
1148 out_r("%10g ", xx[acol[i]][j]);
1149 }
1150 } else{
1151 out_r("%10s ", " ");
1152 }
1153 }
1154 out_r("\n");
1155 p++;
1156 if (p == (SCRLINES - 1) && !(silent)){
1157 p = 2;
1158 out_i(_("---> Please, choose: <RETURN> to continue,\n"
1159 " <Any letter> to stop, or a row number: ") );
1160 GETNLINE
1161 if(!empty){
1162 if((line[0] >= 'a' && line[0] <= 'z')
1163 || (line[0] >= 'A' && line[0] <= 'z'))
1164 return;
1165 r = getint();
1166 if(r > 0)
1167 j = r - 2;
1168 }
1169 colorize(ClHeader);
1170 out_r(header);
1171 colorize(ClDefault);
1172 }
1173 j++;
1174 }
1175 }
1176
1177 void printcol(REAL x[], int n) {
1178 int i, k;
1179 out_r(_("Data from column \"%s\":\n"), get_label(x));
1180 for (i=0; i<n; i++) {
1181 k=i+1;
1182 if (x[i] == SYSMIS)
1183 out_r("%5i.) %s\n", k, NODATA);
1184 else
1185 out_r("%5i.) %g\n", k, x[i]);
1186 if ((i+1) % (SCRLINES - 1) == 0) {
1187 mywait();
1188 if (!empty) {
1189 return;
1190 }
1191 }
1192 }
1193 out_r("-------------------------------------------\n\n");
1194 }
1195
1196 /* =================================================================== */
1197
1198
1199 PREAL readcol(int i) {
1200 PREAL px;
1201
1202 if (nn[i] == 0) {
1203 out_err(FAT, ERR_FILE, ERR_LINE,
1204 _("Column %i does not exist!"), i+1);
1205 }
1206 px = (REAL*)mycalloc(nn[i], sizeof(REAL));
1207 rewind(tmpptr[i]);
1208 FREAD(px, sizeof(REAL), nn[i], tmpptr[i]);
1209 x_read[i] = TRUE;
1210 return px;
1211 }
1212
1213 /* ==================================================================== */
1214
1215
1216 void alloc_cols(int n_alloc, BOOLEAN eraserow) {
1217 int k;
1218 int cr = 0; /* current row */
1219 int tr = 0; /* total number of rows already checked */
1220 BOOLEAN RowHasMis = FALSE;
1221
1222 /* delete all columns from memory */
1223 for (k=0; k<MCOL; k++){
1224 if((x_read[k])){
1225 free_column(k);
1226 }
1227 }
1228
1229 /* put selected columns in memory */
1230 for (k=0; k<n_alloc; k++)
1231 if (!x_read[acol[k]]){
1232 xx[acol[k]] = readcol(acol[k]);
1233 }
1234
1235 /* Delete rows with missing values or simply delete missing values */
1236 /* if eraserow == 3, do nothing */
1237 if (eraserow == TRUE){
1238 while(tr < nn[acol[0]]){
1239 for (k=0; k<n_alloc; k++)
1240 if(xx[acol[k]][tr] == SYSMIS) RowHasMis = TRUE;
1241 if (RowHasMis){
1242 tr++;
1243 RowHasMis = FALSE;
1244 }
1245 else{
1246 for (k=0; k<n_alloc; k++)
1247 xx[acol[k]][cr] = xx[acol[k]][tr];
1248 cr++;
1249 tr++;
1250 }
1251 }
1252 for (k=0; k<n_alloc; k++)
1253 vn[acol[k]] = cr;
1254 out_r( _("%d rows with missing values were deleted for this analysis\n\n"),
1255 (nn[acol[0]] - cr));
1256 }
1257 else if (eraserow == FALSE) {
1258 for (k=0; k<n_alloc; k++){
1259 tr = 0;
1260 cr = 0;
1261 while (tr < nn[acol[k]]){
1262 if(xx[acol[k]][tr] == SYSMIS)
1263 tr++;
1264 else {
1265 xx[acol[k]][cr] = xx[acol[k]][tr];
1266 cr++;
1267 tr++;
1268 }
1269 }
1270 vn[acol[k]] = cr;
1271 out_r( _("Column %s: %d data points\n"),
1272 alias[acol[k]], cr);
1273 }
1274 }
1275
1276 if (log_set)
1277 for (k=0; k<n_alloc; k++)
1278 fprintf(logfile, _("Variable %i = Column %s\n"), (k+1), alias[acol[k]] );
1279
1280 /* rewinding of pointers to tmpfiles */
1281 for (k=0; k<n_alloc; k++)
1282 rewind(tmpptr[acol[k]]);
1283 }
1284
1285
1286 BOOLEAN make_new_col(char *analias, int n) {
1287 int i;
1288
1289 for (i=0; i<ncol; i++) {
1290 if (strcmp(analias, alias[i])==0) {
1291 out_err(ERR, ERR_FILE, ERR_LINE,
1292 _("Column %s exists already!"), analias);
1293 return FALSE;
1294 }
1295 }
1296 create_columns(1);
1297 if(alias[ncol - 1])
1298 myfree(alias[ncol - 1]);
1299 alias[ncol - 1] = (char*)mymalloc((strlen(analias)+1));
1300 strcpy(alias[ncol - 1], analias);
1301 out_r(_("New column %s created!\n"), alias[ncol - 1]);
1302 nn[ncol - 1] = n;
1303 return TRUE;
1304 }
1305
1306 /* =================================================================== */
1307
1308 int col_exist(char *analias, BOOLEAN is_error) {
1309 int i;
1310
1311 for ( i=0; i<ncol; i++ ) {
1312 if ( alias[i] && strcmp(analias, alias[i])==0) {
1313 if(is_error)
1314 out_err(ERR, ERR_FILE, ERR_LINE,
1315 _("Column %s exists already!"), analias);
1316 return i;
1317 }
1318 }
1319 return -1;
1320 }
1321
1322 /* =================================================================== */
1323
1324 /* =================================================================== */
1325
1326 #ifndef STATIST_X
1327 char *get_label(PREAL x) {
1328 int i;
1329
1330 for (i=0; i<ncol; i++) {
1331 if (x == xx[i]) {
1332 if(names[i] && names[i]->ctitle)
1333 return names[i]->ctitle;
1334 else
1335 return alias[i];
1336 }
1337 }
1338 out_err(ERR, ERR_FILE, ERR_LINE,
1339 _("No label found for column!") );
1340 return NULL;
1341 }
1342 #endif
1343
1344 char *get_name(PREAL x) {
1345 int i;
1346
1347 for (i=0; i<ncol; i++) {
1348 if (x == xx[i] && alias[i]){
1349 return alias[i];
1350 }
1351 }
1352 out_err(ERR, ERR_FILE, ERR_LINE,
1353 _("No name found for column!") );
1354 return NULL;
1355 }
1356
1357 void log_transform() {
1358 char analias[80];
1359 PREAL y;
1360 int i, n = 0;
1361
1362 out_i(_("Please select column for log-transformation\n") );
1363 i = getcols(1, 1, 3);
1364 if(i == 0)
1365 return;
1366 strncpy(analias, "log_", 79);
1367 strncat(analias, alias[acol[0]], 79-strlen(analias));
1368 if(col_exist(analias, TRUE) != -1)
1369 return;
1370 y = (REAL*)m_calloc(nn[acol[0]], sizeof(REAL));
1371 for (i=0; i<nn[acol[0]]; i++) {
1372 if (xx[acol[0]][i] > 0.0)
1373 y[i] = log10(xx[acol[0]][i]);
1374 else{
1375 y[i] = SYSMIS;
1376 if(xx[acol[0]][i] != SYSMIS)
1377 n++;
1378 }
1379 }
1380
1381 if(n == 1)
1382 out_err(MWA, ERR_FILE, ERR_LINE, _("One value was less or equal to zero"
1383 " and was transformed into missing value!"));
1384 if(n > 1)
1385 out_err(MWA, ERR_FILE, ERR_LINE, _("%i values were less or equal to zero"
1386 " and were transformed into missing values!"), n);
1387
1388 if (!(make_new_col(analias, nn[acol[0]]))) {
1389 return;
1390 }
1391 FWRITE(y, sizeof(REAL), nn[acol[0]], tmpptr[ncol - 1]);
1392 }
1393
1394
1395 void ln_transform() {
1396 char analias[80];
1397 PREAL y;
1398 int i, n = 0;
1399
1400 out_i(_("Please select column for log-transformation\n") );
1401 i = getcols(1, 1, 3);
1402 if(i == 0)
1403 return;
1404 strncpy(analias, "ln_", 79);
1405 strncat(analias, alias[acol[0]], 79-strlen(analias));
1406 if(col_exist(analias, TRUE) != -1)
1407 return;
1408 y = (REAL*)m_calloc(nn[acol[0]], sizeof(REAL));
1409 for (i=0; i<nn[acol[0]]; i++) {
1410 if (xx[acol[0]][i] > 0.0)
1411 y[i] = log(xx[acol[0]][i]);
1412 else{
1413 y[i] = SYSMIS;
1414 if(xx[acol[0]][i] != SYSMIS)
1415 n++;
1416 }
1417 }
1418
1419 if(n == 1)
1420 out_err(MWA, ERR_FILE, ERR_LINE, _("One value was less or equal to zero"
1421 " and was transformed into missing value!"));
1422 if(n > 1)
1423 out_err(MWA, ERR_FILE, ERR_LINE, _("%i values were less or equal to zero"
1424 " and were transformed into missing values!"), n);
1425
1426 if (!(make_new_col(analias, nn[acol[0]]))) {
1427 return;
1428 }
1429 FWRITE(y, sizeof(REAL), nn[acol[0]], tmpptr[ncol - 1]);
1430 }
1431 /* =================================================================== */
1432
1433
1434 void power_10_transform() {
1435 char analias[80];
1436 PREAL y;
1437 int i;
1438
1439 out_i(_("Please select column for exponentiation\n") );
1440 i = getcols(1, 1, 3);
1441 if(i == 0)
1442 return;
1443 strncpy(analias, "10^_", 79);
1444 strncat(analias, alias[acol[0]], 79-strlen(analias));
1445 if(col_exist(analias, TRUE) != -1)
1446 return;
1447 y = (REAL*)m_calloc(nn[acol[0]], sizeof(REAL));
1448 for (i=0; i<nn[acol[0]]; i++) {
1449 if(xx[acol[0]][i] == SYSMIS)
1450 y[i] = SYSMIS;
1451 else
1452 y[i] = pow(10.0, xx[acol[0]][i]);
1453 }
1454
1455 if (!(make_new_col(analias, nn[acol[0]]))){
1456 return;
1457 }
1458 FWRITE(y, sizeof(REAL), nn[acol[0]], tmpptr[ncol - 1]);
1459 }
1460
1461 void power_e_transform() {
1462 char analias[80];
1463 PREAL y;
1464 int i;
1465
1466 out_i(_("Please select column for exponentiation\n") );
1467 i = getcols(1, 1, 3);
1468 if(i == 0)
1469 return;
1470 strncpy(analias, "e^_", 79);
1471 strncat(analias, alias[acol[0]], 79-strlen(analias));
1472 if(col_exist(analias, TRUE) != -1)
1473 return;
1474 y = (REAL*)m_calloc(nn[acol[0]], sizeof(REAL));
1475 for (i=0; i<nn[acol[0]]; i++) {
1476 if(xx[acol[0]][i] == SYSMIS)
1477 y[i] = SYSMIS;
1478 else
1479 y[i] = exp(xx[acol[0]][i]);
1480 }
1481
1482 if (!(make_new_col(analias, nn[acol[0]]))){
1483 return;
1484 }
1485 FWRITE(y, sizeof(REAL), nn[acol[0]], tmpptr[ncol - 1]);
1486 }
1487
1488
1489 /* =================================================================== */
1490
1491
1492 void inv_transform() {
1493 char analias[80];
1494 PREAL y;
1495 int i;
1496
1497 out_i(_("Please select column for inversion\n") );
1498 i = getcols(1, 1, 3);
1499 if(i == 0)
1500 return;
1501 strncpy(analias, "inv_", 79);
1502 strncat(analias, alias[acol[0]], 79-strlen(analias));
1503 if(col_exist(analias, TRUE) != -1)
1504 return;
1505 y = (REAL*)m_calloc(nn[acol[0]], sizeof(REAL));
1506 for (i=0; i<nn[acol[0]]; i++) {
1507 if(xx[acol[0]][i] == SYSMIS)
1508 y[i] = SYSMIS;
1509 else
1510 y[i] = 1./xx[acol[0]][i];
1511 }
1512
1513 if (!(make_new_col(analias, nn[acol[0]]))){
1514 return;
1515 }
1516 FWRITE(y, sizeof(REAL), nn[acol[0]], tmpptr[ncol - 1]);
1517 }
1518
1519
1520 /* =================================================================== */
1521
1522
1523 void z_transform() {
1524 char analias[80];
1525 PREAL y;
1526 REAL mean, sdv;
1527 int i;
1528
1529 out_i(_("Please select column for z-transformation\n") );
1530 i = getcols(1, 1, TRUE);
1531 if(i == 0)
1532 return;
1533 strncpy(analias, "z_", 79);
1534 strncat(analias, alias[acol[0]], 79-strlen(analias));
1535 if(col_exist(analias, TRUE) != -1)
1536 return;
1537 y = (REAL*)m_calloc(nn[acol[0]], sizeof(REAL));
1538 sdv = get_sdv(xx[acol[0]], nn[acol[0]]);
1539 mean = get_mean(xx[acol[0]], nn[acol[0]]);
1540 if(nn[acol[0]] != vn[acol[0]])
1541 alloc_cols(1, 3);
1542 for (i=0; i<nn[acol[0]]; i++) {
1543 if(xx[acol[0]][i] == SYSMIS)
1544 y[i] = SYSMIS;
1545 else
1546 y[i] = (xx[acol[0]][i]-mean)/sdv;
1547 }
1548
1549 if (!(make_new_col(analias, nn[acol[0]]))){
1550 return;
1551 }
1552 FWRITE(y, sizeof(REAL), nn[acol[0]], tmpptr[ncol - 1]);
1553 }
1554
1555
1556 /* =================================================================== */
1557
1558 void sort_col() {
1559 char analias[80];
1560 PREAL y;
1561 int i;
1562
1563 out_i(_("Please select column to be sorted\n") );
1564 i = getcols(1, 1, 3);
1565 if(i == 0)
1566 return;
1567 strncpy(analias, "sort_", 79);
1568 strncat(analias, alias[acol[0]], 79-strlen(analias));
1569 if(col_exist(analias, TRUE) != -1)
1570 return;
1571 y = (REAL*)m_calloc(nn[acol[0]], sizeof(REAL));
1572 for (i=0; i<nn[acol[0]]; i++) {
1573 y[i] = xx[acol[0]][i];
1574 }
1575 qsort(y, nn[acol[0]], sizeof(REAL), real_compar_up);
1576 if (!(make_new_col(analias, nn[acol[0]]))){
1577 return;
1578 }
1579 FWRITE(y, sizeof(REAL), nn[acol[0]], tmpptr[ncol - 1]);
1580 }
1581
1582
1583 /* =================================================================== */
1584
1585
1586 void readcol_from_term() {
1587 char aline[80], answer[10];
1588 int n=0;
1589 REAL temp;
1590 BOOLEAN ok, stop=FALSE;
1591
1592 if (ncol > 0) {
1593 out_i(_("Shall all data be deleted? (%s) "), _("y/N") );
1594 GETNLINE;
1595 if(!(empty)){
1596 sscanf(line, "%s", answer);
1597 if (answer[0] == _("y")[0] || answer[0] == _("Y")[0]) {
1598 erasetempfiles();
1599 }
1600 }
1601 }
1602
1603 out_i(_("Column %i is being read, stop input with '.'\n"), (ncol+1));
1604 aline[0] = '1';
1605 create_columns(1);
1606
1607 while (!stop) {
1608 ok = FALSE;
1609 while (!ok) {
1610 out_d(_("Value %i: "), (n+1));
1611 fgets(aline, 79, stdin);
1612 if ( (aline[0]=='.') && (strlen(aline)==2) ) {
1613 stop = TRUE;
1614 }
1615 if ( (sscanf(aline, "%lf", &temp)==1) || (stop) ) {
1616 ok = TRUE;
1617 }
1618 else {
1619 out_err(ERR, ERR_FILE, ERR_LINE,
1620 _("Illegal input, please repeat: ") );
1621 }
1622
1623 if ( (ok) && (!stop) ) {
1624 n++;
1625 FWRITE(&temp, sizeof(REAL), 1, tmpptr[ncol - 1]);
1626 }
1627 }
1628 }
1629 if (n>0) {
1630 nn[ncol - 1] = n;
1631 } else{
1632 delete_column(ncol - 1);
1633 }
1634 }
1635
1636
1637
1638 BOOLEAN str_in_str(const char *s1, const char *s2) {
1639 int i, n = strlen(s2);
1640
1641 for (i=0; i<n; i++) {
1642 if (s1[i] != s2[i]) {
1643 return FALSE;
1644 }
1645 }
1646 return TRUE;
1647 }
1648
1649
1650 BOOLEAN emptyline(const char *s) {
1651 int i, n = strlen(s);
1652
1653 for (i=0; i<n; i++) {
1654 if (!isspace((int)s[i])) {
1655 return FALSE;
1656 }
1657 }
1658 return TRUE;
1659 }
1660
1661
1662 BOOLEAN formatToken(char *token, char *result){
1663 int i = 0;
1664 REAL test;
1665
1666 /* Remove leading blanks */
1667 while(*token == ' ' && *token != 0)
1668 token++;
1669 if(*token == 0 || *token == '\r' || *token == '\n'){
1670 sprintf(result, "%s", NODATA);
1671 return FALSE;
1672 }
1673
1674 /* Get the token, including blanks */
1675 while(*token != 0){
1676 result[i] = *token;
1677 token++;
1678 i++;
1679 }
1680
1681 /* Remove trailing blanks */
1682 i--;
1683 while(result[i] == ' '){
1684 result[i] = 0;
1685 i--;
1686 }
1687 i++;
1688
1689 result[i] = 0;
1690 if(sscanf(result, "%lf", &test) == 1){
1691 if(test == floor(test) && test >= -9999999999999999.0 && test <= 9999999999999999.0)
1692 snprintf(result, 32, "%-16.16g", test);
1693 else
1694 snprintf(result, 32, "%-16.10g", test);
1695 token = result;
1696 while(*token != 0){
1697 token++;
1698 if(*token == ' '){
1699 *token = 0;
1700 break;
1701 }
1702 }
1703 return FALSE;
1704 } else{
1705 strcpy(token, result);
1706 sprintf(result, "\"%s\"", token);
1707 return TRUE;
1708 }
1709 }
1710
1711 void xcols_usage(char * name){
1712 out_d(_("\nThe option --xcols tells Statist to extract columns from a fixed "
1713 "width data file.\n\n"));
1714 out_d(_("\nUsage:\n"
1715 "%s --xcols config_file data_base dest_file\n\n"), name);
1716 exit(0);
1717 }
1718
1719 void extract_cols(int argc, char *argv[]){
1720 FILE *f1, *f2;
1721 char b[1000];
1722 int i, j, k, n = 0, nrows = 0, rlen, max = 100, pos = 0, *begin, *end;
1723 size_t blen = 64;
1724 char *b1, **lbel, *token, *ftokn;
1725 unsigned int l;
1726 BOOLEAN *alpha;
1727
1728 #ifndef NO_GETTEXT
1729 SET_C_LOCALE;
1730 #endif
1731
1732 if(sep == 0)
1733 sep = ' ';
1734
1735 for(i = 1; i < argc; i++)
1736 if(strcmp(argv[i], "--xcols") == 0){
1737 pos = i;
1738 break;
1739 }
1740
1741 if((argc - pos) < 4)
1742 xcols_usage(argv[0]);
1743
1744 lbel = (char**) mymalloc(max * sizeof(char*));
1745 begin = (int*) mymalloc(max * sizeof(int));
1746 end = (int*) mymalloc(max * sizeof(int));
1747 alpha = (BOOLEAN*)mymalloc(max * sizeof(BOOLEAN));
1748 b1 = (char*) mymalloc(blen * sizeof(char));
1749
1750 /* read config_file */
1751 FOPEN(argv[pos + 1], "r", f1);
1752 rlen = get_line(&b1, &blen, f1);
1753 while (rlen != -1){
1754 if(b1[0] == '#' || strlen(b1) < 3){
1755 rlen = get_line(&b1, &blen, f1);
1756 continue;
1757 }
1758 i = 0;
1759 while(i < 997 && b1[i] != ' ' && b1[i] != '\t'){
1760 b[i] = b1[i];
1761 i++;
1762 }
1763 b[i] = 0;
1764 lbel[n] = (char*) mymalloc(i + 1);
1765 strcpy(lbel[n], b);
1766 while(!(b1[i] >= '0' && b1[i] <= '9'))
1767 i++;
1768 j = 0;
1769 while(b1[i] >= '0' && b1[i] <= '9'){
1770 b[j] = b1[i];
1771 i++; j++;
1772 }
1773 b[j] = 0;
1774 begin[n] = atoi(b) - 1;
1775 while(!((b1[i] >= '0' && b1[i] <= '9') || b1[i] == '\n'))
1776 i++;
1777 j = 0;
1778 while(b1[i] >= '0' && b1[i] <= '9'){
1779 b[j] = b1[i];
1780 i++; j++;
1781 }
1782 b[j] = 0;
1783 if(b[0])
1784 end[n] = atoi(b) - 1;
1785 else
1786 end[n] = begin[n];
1787 n++;
1788 if(n == max){
1789 max += 100;
1790 lbel = (char**) myrealloc(lbel, (max * sizeof(char*)));
1791 begin = (int*) myrealloc(begin, (max * sizeof(int)));
1792 end = (int*) myrealloc(end, (max * sizeof(int)));
1793 alpha = (BOOLEAN*) myrealloc(alpha, (max * sizeof(BOOLEAN)));
1794 }
1795 rlen = get_line(&b1, &blen, f1);
1796 }
1797 FCLOSE(f1);
1798
1799 j= 0;
1800 l = 3;
1801 for(i = 0; i < n; i++){
1802 alpha[i] = FALSE;
1803 j += strlen(lbel[i]) + 30;
1804 if(j > l)
1805 l = j;
1806 if((end[i] - begin[i] + 1) > l)
1807 l = end[i] - begin[i] + 1;
1808 }
1809 if(l < 128)
1810 l = 128;
1811 else
1812 l *= 3;
1813 token = (char*)mymalloc(l * sizeof(char));
1814 ftokn = (char*)mymalloc(l * sizeof(char));
1815
1816 /* read from origin, and write to destination */
1817 FOPEN(argv[pos + 2], "r", f1);
1818 FOPEN(argv[pos + 3], "w", f2);
1819 out_d(_("Extracting columns from \"%s\" to \"%s\"...\n"),
1820 argv[pos + 2], argv[pos + 3]);
1821 /* Don't put the "#%" string in the first line if the user doesn't seem
1822 * to use it. */
1823 if(!(has_header || detect_header))
1824 fprintf(f2, "#%%");
1825 for(i = 0; i < (n - 1); i++)
1826 fprintf(f2, "%s%c", lbel[i], sep);
1827 fprintf(f2, "%s\n", lbel[n-1]);
1828 rlen = get_line(&b1, &blen, f1);
1829 while (rlen != -1){
1830 for(i = 0; i < n; i++){
1831 k = 0;
1832 for(j = begin[i]; j <= end[i]; j++){
1833 token[k] = b1[j];
1834 k++;
1835 }
1836 token[k] = 0;
1837 if(formatToken(token, ftokn))
1838 alpha[i] = TRUE;
1839 if(i < (n - 1))
1840 fprintf(f2, "%s%c", ftokn, sep);
1841 else
1842 fprintf(f2, "%s\n", ftokn);
1843 }
1844 rlen = get_line(&b1, &blen, f1);
1845 nrows++;
1846 }
1847 FCLOSE(f1);
1848 FCLOSE(f2);
1849 myfree(b1);
1850 myfree(begin);
1851 myfree(end);
1852 out_d(_("Done: %d columns, %d rows.\n"), n, nrows);
1853 #ifndef NO_GETTEXT
1854 RESET_LOCALE;
1855 #endif
1856 j = 0;
1857 for(i = 0; i < n; i++)
1858 if(alpha[i])
1859 j = 1;
1860 if(j){
1861 out_err(WAR, ERR_FILE, ERR_LINE,
1862 _("Non-numeric values were found."));
1863 out_r(_("List of columns with non-numeric values:\n"));
1864 for(i = 0; i < n; i++)
1865 if(alpha[i])
1866 out_r(" %s", lbel[i]);
1867 out_r("\n");
1868 }
1869 for(i = 0; i < n; i++)
1870 myfree(lbel[i]);
1871 myfree(lbel);
1872 myfree(alpha);
1873 }
1874
1875 void xsample_usage(char * name){
1876 out_d(_("\nThe option --xsample tells Statist to extract a random sample of\n"
1877 "rows from a given data file.\n\n"));
1878 out_d(_("Usage:\n\n"
1879 " %s --xsample percentage data_base dest_file\n\n"
1880 "where \"percentage\" is an integer between 1 and 99.\n\n"), name);
1881 exit(1);
1882 }
1883
1884 void extract_sample(int argc, char * argv[]){
1885 int percent = -1;
1886 char *s;
1887 int i, k, n = 0, N = 0, rlen, pos = 0;
1888 FILE * f1;
1889 FILE * f2;
1890 size_t blen = 64;
1891 #ifndef NO_GETTEXT
1892 SET_C_LOCALE;
1893 #endif
1894
1895 for(i = 1; i < argc; i++)
1896 if(strcmp(argv[i], "--xsample") == 0){
1897 pos = i;
1898 break;
1899 }
1900
1901 if((argc - pos) < 4)
1902 xsample_usage(argv[0]);
1903 percent = atoi(argv[pos + 1]);
1904 if(percent > 99 || percent < 1){
1905 out_err(ERR, ERR_FILE, ERR_LINE,
1906 _("\"%s\" is not a valid value for percentage."), argv[pos + 1]);
1907 xsample_usage(argv[0]);
1908 }
1909
1910 s = (char*)mymalloc(blen);
1911
1912 /* read from source, and write to destine */
1913 srand(time(NULL));
1914 k = percent * 10;
1915 FOPEN(argv[pos + 2], "r", f1);
1916 FOPEN(argv[pos + 3], "w", f2);
1917
1918 out_r(_("Creating a new database with a random sample of approximately\n"
1919 "%i%% of \"%s\" rows...\n"), percent, argv[3]);
1920
1921 rlen = get_line(&s, &blen, f1);
1922 while(rlen != -1 && (s[0] == '#' || (s[0] >= 'A' && s[0] <= 'Z') ||
1923 (s[0] >= 'a' && s[0] <= 'z') || (s[0] == '"' &&
1924 ((s[1] >= 'A' && s[1] <= 'Z') || (s[1] >= 'a' && s[1] <= 'z'))))){
1925 fputs(s, f2);
1926 rlen = get_line(&s, &blen, f1);
1927 }
1928 while(rlen != -1){
1929 i = rand() % 1000;
1930 if(i < k){
1931 fputs(s, f2);
1932 n++;
1933 }
1934 rlen = get_line(&s, &blen, f1);
1935 N++;
1936 }
1937 FCLOSE(f1);
1938 FCLOSE(f2);
1939 myfree(s);
1940 out_r(_("Done: selected %d out of %d rows.\n"), n, N);
1941 #ifndef NO_GETTEXT
1942 RESET_LOCALE;
1943 #endif
1944 }
1945
1946 /* Export current database as fixed width data file */
1947 void exp_fwdf(){
1948 int i, j, k, *w;
1949 char *p, s[32], q[32], dfname[MLINE], cfname[MLINE];
1950 FILE *df, *cf;
1951 REAL r;
1952 if(ncol < 2){
1953 out_err(ERR, ERR_FILE, ERR_LINE,
1954 _("The current data file has less than 2 columns!"));
1955 return;
1956 }
1957 for(i = 1; i < ncol; i++)
1958 if(nn[0] != nn[i]){
1959 out_err(ERR, ERR_FILE, ERR_LINE,
1960 _("There are columns with different number of rows!"));
1961 return;
1962 }
1963
1964 /* Calculating the necessary width for each column */
1965 w = (int*)m_calloc(ncol, sizeof(int));
1966 for(i = 0; i < ncol; i++){
1967 acol[0] = i;
1968 alloc_cols(1, FALSE);
1969 for(j = 0; j < vn[i]; j++){
1970 r = xx[i][j];
1971 if(r == floor(r) && r >= -9999999999999999.0 && r <= 9999999999999999.0)
1972 snprintf(s, 32, "%16.16g", r);
1973 else
1974 snprintf(s, 32, "%16.10g", r);
1975 p = s;
1976 while(p[0] == ' ')
1977 p++;
1978 k = strlen(p);
1979 if(k > w[i])
1980 w[i] = k;
1981 }
1982 }
1983
1984 out_i(_("Please enter name of the export file: ") );
1985 GETRLINE;
1986 sscanf(line, "%s", dfname);
1987 out_i(_("Please enter name of the list of columns file: ") );
1988 GETRLINE;
1989 sscanf(line, "%s", cfname);
1990 FOPEN(dfname, "wt", df);
1991 FOPEN(cfname, "wt", cf);
1992 j = 1;
1993 k = 0;
1994
1995 /* saving the list of columns */
1996 for(i = 0; i < ncol; i++){
1997 k += w[i];
1998 fprintf(cf, "%s %i-%i\n", alias[i], j, k);
1999 j += w[i];
2000 }
2001 FCLOSE(cf);
2002 out_d(_("File \"%s\" saved!"), cfname);
2003 out_d("\n");
2004
2005 /* saving the fixed width datafile */
2006 #ifndef NO_GETTEXT
2007 SET_C_LOCALE;
2008 #endif
2009 k = sizeof(REAL);
2010 for(i = 0; i < nn[0]; i++){
2011 for(j = 0; j < ncol; j++){
2012 FREAD(&r, k, 1, tmpptr[j]);
2013 if(r == SYSMIS){
2014 sprintf(s, "%%%is", w[j]);
2015 fprintf(df, s, " ");
2016 } else{
2017 if(r == floor(r) && r >= -9999999999999999.0 && r <= 9999999999999999.0)
2018 snprintf(s, 32, "%16.16g", r);
2019 else
2020 snprintf(s, 32, "%16.10g", r);
2021 p = s;
2022 while(p[0] == ' ')
2023 p++;
2024 snprintf(q, 32, "%%%is", w[j]);
2025 fprintf(df, q, p);
2026 }
2027 }
2028 fprintf(df, "\n");
2029 }
2030 #ifndef NO_GETTEXT
2031 RESET_LOCALE;
2032 #endif
2033
2034 /* Finishing */
2035 FCLOSE(df);
2036 out_r(_("File \"%s\" saved!"), dfname);
2037 out_r("\n\n");
2038 }
2039