"Fossies" - the Fresh Open Source Software Archive 
Member "statist-1.4.2/src/procs.c" (15 Nov 2006, 83062 Bytes) of package /linux/privat/old/statist-1.4.2.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "procs.c" see the
Fossies "Dox" file reference documentation.
1 /* This file is part of statist
2 **
3 ** It is distributed under the GNU General Public License.
4 ** See the file COPYING for details.
5 **
6 ** (c) 1997 Dirk Melcher
7 ** Doerper Damm 4
8 ** 49134 Wallenhorst
9 ** GERMANY
10 ** Tel. 05407/7636
11 ** email: Dirk.Melcher@usf.Uni-Osnabrueck.DE
12 **
13 ** small Changes by Bernhard Reiter http://www.usf.Uni-Osnabrueck.DE/~breiter
14 ** $Id: procs.c,v 1.35 2006/11/15 17:17:34 jakson Exp $
15 **
16 **
17 ** further changes: Andreas Beyer, 1999, abeyer@usf.uni-osnabrueck.de
18 ***************************************************************/
19
20 /* procs.c for STATIST */
21
22 #include <stdio.h>
23 #include <math.h>
24 #include <string.h>
25 #include <stdlib.h>
26 #include <limits.h>
27 #include <float.h>
28
29 #include "statist.h"
30 #include "funcs.h"
31 #include "plot.h"
32 #include "procs.h"
33 #include "data.h"
34 #include "gettext.h"
35
36
37 /* =================================================================== */
38
39
40 void part_corr(PREAL xx[], int nrow, int ncol) {
41 PREAL r[6], x, y;
42 REAL x_sdv, y_sdv, s_xy;
43 int i, k, df;
44 REAL r12s3, r12s4, r53s4, r25s4, r15s4, r12s5, r13s5,
45 r23s5, r12s34, r15s34, r25s34, r14s5, r24s5, r12s35, r12s45,
46 r12s345, r13s4, r23s4, t, alpha, a, r14s3, r13s2;
47 /*
48 calculate partial correlation;
49 at most 3 constant correlations
50 */
51
52 for (i=0; i<ncol; i++) {
53 r[i+1] = (REAL*)m_calloc((ncol+1), sizeof(REAL));
54 for (k=0; k<ncol; k++) {
55 x = xx[i];
56 y = xx[k];
57
58 s_xy = (get_xysum(x,y,nrow) - ((get_sum(x,nrow)*get_sum(y,nrow))/(REAL) nrow))/
59 (REAL) (nrow-1);
60 x_sdv = get_sdv(x,nrow);
61 y_sdv = get_sdv(y,nrow);
62 DIV(r[i+1][k+1], s_xy, (x_sdv*y_sdv));
63 }
64 }
65
66 DIV(r12s3, (r[1][2]-r[1][3]*r[2][3]), sqrt((1.-SQR(r[1][3]))*(1.-SQR(r[2][3]))));
67 DIV(r13s2, (r[1][3]-r[1][2]*r[3][2]),sqrt((1.-SQR(r[1][2]))*(1.-SQR(r[3][2]))));
68
69 out_start();
70 colorize(ClHeader);
71 out_r(_("\nResult partial correlation:\n") );
72 colorize(ClDefault);
73 out_r(" r12 = %f\n\n", r[1][2]);
74
75 out_r(_("One constant correlation:") );
76 out_r("\n r12_3 = %f diff= %f\n", r12s3, (r[1][2]-r12s3));
77 out_r(" r13_2 = %f diff= %f\n", r13s2, (r[1][3]-r13s2));
78
79 if (ncol >= 4) {
80 DIV(r12s4, (r[1][2]-r[1][4]*r[2][4]), sqrt((1.-SQR(r[1][4]))*(1.-SQR(r[2][4]))));
81 DIV(r13s4, (r[1][3]-r[1][4]*r[3][4]), sqrt((1.-SQR(r[1][4]))*(1.-SQR(r[3][4]))));
82 DIV(r23s4, (r[2][3]-r[2][4]*r[3][4]), sqrt((1.-SQR(r[2][4]))*(1.-SQR(r[3][4]))));
83
84 DIV(r14s3, (r[1][4]-r[1][3]*r[4][3]), sqrt((1.-SQR(r[1][3]))*(1.-SQR(r[4][3]))));
85
86 DIV(r12s34, (r12s4-r13s4*r23s4), sqrt((1.-SQR(r13s4))*(1.-SQR(r23s4))));
87
88 out_r(" r12_4 = %f diff= %f\n", r12s4, r[1][2]-r12s4);
89 out_r(" r13_4 = %f diff= %f\n", r13s4, r[1][3]-r13s4);
90
91 out_r(" r14_3 = %f diff= %f\n\n", r14s3, r[1][4]-r14s3);
92 out_r(_("Two constant correlations:"));
93 out_r("\n r12_34 = %f diff= %f\n", r12s34, r[1][2]-r12s34);
94
95 if (ncol == 5) {
96 DIV(r53s4, (r[5][3]-r[5][4]*r[3][4]), sqrt((1.-SQR(r[5][4]))*(1.-SQR(r[3][4]))));
97 DIV(r25s4, (r[2][5]-r[2][4]*r[4][5]), sqrt((1.-SQR(r[2][4]))*(1.-SQR(r[4][5]))));
98 DIV(r15s4, (r[1][5]-r[1][4]*r[4][5]), sqrt((1.-SQR(r[1][4]))*(1.-SQR(r[4][5]))));
99 DIV(r12s5, (r[1][2]-r[1][5]*r[2][5]), sqrt((1.-SQR(r[1][5]))*(1.-SQR(r[2][5]))));
100 DIV(r13s5, (r[1][3]-r[1][5]*r[3][5]), sqrt((1.-SQR(r[1][5]))*(1.-SQR(r[3][5]))));
101 DIV(r23s5, (r[2][3]-r[2][5]*r[3][5]), sqrt((1.-SQR(r[2][5]))*(1.-SQR(r[3][5]))));
102
103 DIV(r15s34, (r15s4-r13s4*r53s4), sqrt((1.-SQR(r13s4))*(1.-SQR(r53s4))));
104 DIV(r25s34, (r25s4-r23s4*r53s4), sqrt((1.-SQR(r23s4))*(1.-SQR(r53s4))));
105 DIV(r14s5, (r[1][4]-r[1][5]*r[4][5]), sqrt((1.-SQR(r[1][5]))*(1.-SQR(r[4][5]))));
106 DIV(r24s5, (r[2][4]-r[2][5]*r[4][5]), sqrt((1.-SQR(r[2][5]))*(1.-SQR(r[4][5]))));
107 DIV(r12s35, (r12s5-r13s5*r23s5), sqrt((1.-SQR(r13s5))*(1.-SQR(r23s5))));
108 DIV(r12s45, (r12s5-r14s5*r24s5), sqrt((1.-SQR(r14s5))*(1.-SQR(r24s5))));
109
110 DIV(r12s345, (r12s34-r15s34*r25s34), sqrt((1.-SQR(r15s34))*(1.-SQR(r25s34))));
111
112 out_r(" r12_5 = %f diff= %f\n", r12s5, r[1][2]-r12s5);
113 out_r(" r12_35 = %f diff= %f\n", r12s35, r[1][2]-r12s35);
114 out_r(" r12_45 = %f diff= %f\n\n", r12s45, r[1][2]-r12s45);
115 out_r(_("Three constant correlations:") );
116 out_r("\n r12_345 = %f diff= %f\n", r12s345, r[1][2]-r12s345);
117 }
118 }
119
120 df = nrow - 3;
121 t = r12s3/(sqrt(1.-SQR(r12s3))) * sqrt((REAL)df);
122 /* f = SQR(r12s3)/(1.-SQR(r12s3)) * (REAL)(nrow-2); */
123 alpha = get_t_int(fabs(t), df);
124
125 out_r( _("\nt-Test:\n"));
126 out_r(_("Hypothesis H0: r12_3 = 0 against hypothesis H1: r12_3 # 0 "
127 "(->two sided)\n") );
128 a = 1.0-alpha;
129 out_r(_("Probability of H0 = %6.4f\n\n"), a);
130 /*
131 out_r("F-Test:\n");
132 alpha = get_f_int(f, 1, df);
133 a = alpha;
134 out_r("Hypothese H0: r=0 gegen Hypothese H1: r#0 (->zweiseitig)\n");
135 out_r("Irrtumswahrscheinlichkeit alpha fuer H0 = %6.4f\n\n", a);
136 */
137 out_end();
138
139 }
140
141 /* =================================================================== */
142
143
144 void multiple_reg(REAL y[], PREAL x[], int nrow, int ncol) {
145 REAL *b, *resi, sdv, rquad, alpha, f_calc, reg, theo;
146 int i, j, rows;
147 FILE *rfile;
148 BOOLEAN hasmiss = FALSE;
149
150 b = (REAL*)m_calloc(ncol+1, sizeof(REAL));
151 if ((rquad=get_multiple_reg(y, x, nrow, ncol, b, &sdv, &f_calc))==REAL_MIN) {
152 return;
153 }
154
155 out_start();
156 colorize(ClHeader);
157 out_r(_("\nResults multiple linear regression:\n") );
158 colorize(ClDefault);
159 out_r(_("Regressed equation: y = B[0] + B[1]*x[1] + B[2]*x[2] +...+") );
160 out_r(" B[n]*x[n] \n\n");
161
162 for (i=0; i<(ncol+1); i++) {
163 out_r("B[%i] = %f\n", i, b[i]);
164 }
165 out_r("\n");
166
167
168 if (!noplot) {
169 if (ncol==2) {
170 plot_tripl(x[0], x[1], y, nrow, b[0], b[1], b[2],
171 get_label(x[0]), get_label(x[1]), get_label(y));
172 }
173
174 if (ncol==1) {
175 plot_pair(x[0], y, nrow, b[0], b[1], get_label(x[0]), get_label(y));
176 }
177 }
178
179 /* If there is any missing value, the residues will be in the wrong rows.
180 * To avoid this, we need to realloc cols before calculating "resi" */
181 rows= nrow;
182 for(j = 0; j < ncol; j++)
183 if(nn[acol[j]] != vn[acol[j]]){
184 alloc_cols((ncol + 1), 3);
185 rows = nn[acol[0]];
186 break;
187 }
188
189 resi = (REAL*)m_calloc(rows, sizeof(REAL));
190
191 for(j = 0; j < rows; j++){
192 theo = b[0];
193 for(i = 1; i <= ncol; i++){
194 if(xx[acol[i]][j] == SYSMIS){
195 hasmiss = TRUE;
196 break;
197 }
198 theo += b[i] * xx[acol[i]][j];
199 }
200 if(hasmiss){
201 resi[j] = SYSMIS;
202 hasmiss = FALSE;
203 } else
204 resi[j] = xx[acol[0]][j] - theo;
205 }
206
207 if ((j=col_exist("resi", FALSE)) == -1) {
208 if (!(make_new_col("resi", rows))) {
209 out_end();
210 return;
211 }
212 } else{
213 #ifndef STATIST_X
214 out_r(_("Column 'resi' updated!\n") );
215 #endif
216 }
217 if ((j=col_exist("resi", FALSE)) != -1) {
218 rfile = tmpptr[j];
219 } else{
220 return;
221 }
222 rewind(rfile);
223 FWRITE(resi, sizeof(REAL), rows, rfile);
224 rewind(rfile);
225
226 SQRT(reg, rquad);
227 out_r(_("Coefficient of determination r^2 = %f\n"), rquad);
228 out_r(_("Correlation coefficient r = %f\n"), reg);
229 out_r(_("Standard deviation s = %f\n"), sdv);
230 out_r(_("Number of data points n = %i\n"), nrow);
231
232 if (!equal(1.0, reg)) {
233 out_r(_("\nF-value = %f\n"), f_calc);
234 out_r(_("Degree of freedom f1 = %i\n"), ncol);
235 out_r(_("Degree of freedom f2 = %i\n"), nrow-ncol-1);
236
237 out_r(_("\nF-Test:\n"));
238 out_r(_("Hypothesis H0: r=0 against hypothesis H1: r#0 \n") );
239 alpha = get_f_int(f_calc, ncol, nrow-ncol-1);
240 if (reg < 0.0) {
241 alpha = 1.-alpha;
242 }
243 out_r(_("Probability of H0: %6.4f\n\n"), 1.0-alpha);
244 }
245 else {
246 out_r(_("F-Test not possible since r = 1\n\n") );
247 }
248 out_end();
249 }
250
251 /* =================================================================== */
252
253
254
255 void cross_validate(REAL y[], PREAL x[], int nrow, int nc) {
256 char *pred_label;
257 REAL rquad_ori_pred, qquad, rquad, sdv, f_calc;
258 REAL *b, *ypred, *ypredm;
259 char analias[80];
260 int i, j, k;
261 BOOLEAN hasmiss = FALSE;
262
263
264 b = (REAL*) m_calloc(nc+1, sizeof(REAL));
265 ypred = (REAL*) m_calloc(nrow, sizeof(REAL));
266 if ((qquad=get_cross_validate(y, x, nrow, nc, ypred))==REAL_MIN) {
267 return;
268 }
269 if ((rquad=get_multiple_reg(y,x,nrow,nc,b,&sdv,&f_calc))==REAL_MIN) {
270 return;
271 }
272 if ((rquad_ori_pred=get_multiple_reg(ypred,&y,nrow,1,b,&sdv,&f_calc))==REAL_MIN) {
273 return;
274 }
275
276 if (!noplot) {
277 pred_label = m_calloc(1,strlen(get_label(y))+strlen(" vorhergesagt")+1);
278 strcpy(pred_label, get_label(y));
279 strcat(pred_label, _(" predicted") );
280 plot_pair(y, ypred, nrow, b[0], b[1], get_label(y), pred_label);
281 }
282
283 out_start();
284 out_r(_("Coefficient of determination r^2 = %f\n"), rquad);
285 out_r(_("r^2 of regression y versus y_pred = %f\n"), rquad_ori_pred);
286 out_r(_("Variance of prediction q^2 = %f\n"), qquad);
287
288 /* If we have missing values, the ypred values might be in the wrong rows.
289 * In this case, we need to insert the missing values in "ypred". */
290 ypredm = ypred;
291 for(i = 0; i <= nc; i++)
292 if(nn[acol[i]] != vn[acol[i]]){
293 hasmiss = TRUE;
294 nrow = nn[acol[0]];
295 nc++;
296 alloc_cols(nc, 3);
297 break;
298 }
299 if(hasmiss){
300 hasmiss = FALSE;
301 k = 0;
302 ypredm = (REAL*) m_calloc(nrow, sizeof(REAL));
303 for(i = 0; i < nrow; i++){
304 for(j = 0; j < nc; j++)
305 if(xx[acol[j]][i] == SYSMIS){
306 hasmiss = TRUE;
307 break;
308 }
309 if(hasmiss){
310 ypredm[i] = SYSMIS;
311 hasmiss = FALSE;
312 }
313 else{
314 ypredm[i] = ypred[k];
315 k++;
316 }
317 }
318 }
319
320 /* create new data record: */
321 strcpy(analias, "pred_");
322 strncat(analias, get_name(xx[acol[0]]), 79-strlen(analias));
323 if (!(make_new_col(analias, nrow))) {
324 out_end();
325 return;
326 }
327
328 FWRITE(ypredm, sizeof(REAL), nrow, tmpptr[ncol - 1]);
329 out_end();
330 }
331
332 /* ================================================================== */
333
334
335 void random_tupel(REAL y[], PREAL x[], int nrow, int nc, int ntup) {
336 int i, k, j;
337 BOOLEAN found;
338 TUPEL *tupel_list, atupel;
339 REAL rquad, sdv, f_calc;
340 REAL *yperm, *b, *ypred, qquad;
341 FILE *fp1, *fp2;
342
343 yperm = (REAL*) m_calloc(nrow, sizeof(REAL));
344 ypred = (REAL*) m_calloc(nrow, sizeof(REAL));
345 atupel.a = (unsigned short int*) m_calloc(nrow, sizeof(unsigned short int));
346 atupel.n = nrow;
347 tupel_list = (TUPEL*) m_calloc(ntup, sizeof(TUPEL));
348 b = (REAL*)m_calloc(nc+1, sizeof(REAL));
349
350 if ((rquad=get_multiple_reg(y, x, nrow, nc, b, &sdv, &f_calc))==REAL_MIN) {
351 return;
352 };
353 if ((qquad=get_cross_validate(y, x, nrow, nc, ypred))==REAL_MIN) {
354 return;
355 }
356 out_start();
357 out_r(_("\nOriginal y-Vector: r^%6.4f q^2=%6.4f\n\n"), rquad, qquad);
358
359 if(!(make_new_col("rquad", ntup))){
360 out_end();
361 return;
362 }
363 fp1 = tmpptr[ncol - 1];
364 if(!(make_new_col("qquad", ntup))){
365 out_end();
366 return;
367 }
368 fp2 = tmpptr[ncol - 1];
369
370 out_d(_("Starting with randomization of y-vector. Please be patient ...\n"));
371 i=0;
372 do {
373 create_tupel(&atupel, nrow);
374 /* print_tupel(atupel); */
375 found = FALSE;
376 for (k=0; k<i; k++) {
377 if (equal_tupel(atupel, tupel_list[k])) {
378 found = TRUE;
379 break;
380 }
381 }
382
383 if (!found) {
384 copy_tupel(&tupel_list[i], &atupel);
385 i++;
386 }
387 if (i%100==0) {
388 out_d(".");
389 fflush(stdout);
390 }
391 } while (i<ntup);
392 out_d("\n");
393
394 out_d(_("Calculating q^2 and r^2 of randomized vectors ...") );
395
396 #define _FINISH_RANDOM_TUPLE \
397 delete_last_columns(2); \
398 out_end();
399
400
401 for (i=0; i<ntup; i++) {
402 if (i%100==0) {
403 out_d(".");
404 fflush(stdout);
405 }
406 for (k=0; k<nrow; k++) {
407 j = tupel_list[i].a[k];
408 yperm[k] = y[j];
409 }
410 if ((rquad=get_multiple_reg(yperm, x, nrow, nc, b, &sdv, &f_calc))==REAL_MIN) {
411 _FINISH_RANDOM_TUPLE
412 return;
413 }
414 if ((qquad=get_cross_validate(yperm, x, nrow, nc, ypred))==REAL_MIN) {
415 _FINISH_RANDOM_TUPLE
416 return;
417 }
418 FWRITE(&rquad, sizeof(REAL), 1, fp1);
419 FWRITE(&qquad, sizeof(REAL), 1, fp2);
420 }
421 out_d(_("\ndone!\n\n") );
422
423 #undef _FINISH_RANDOM_TUPLE
424 }
425
426 /* ================================================================= */
427
428 void poly_reg(REAL x[], REAL y[], int n, int m) {
429
430 REAL a[2*MPOLY+1], e[MPOLY+2], q[MPOLY+1][MPOLY+2];
431
432 REAL br, cr, sr, tr, jor, fr, kr, pr, alpha;
433 int i, jo, t, s, f1, f2;
434
435 for (i=1; i<(2*m+1); i++) {
436 a[i] = 0.0;
437 }
438
439 for (i=0; i<(m+2); i++) {
440 e[i] = 0.0;
441 }
442
443 a[0] = (REAL) n;
444
445 for (i=0; i<n; i++) {
446 pr = 1.;
447 for (jo=1; jo<(2*m+1); jo++) {
448 pr *= x[i];
449 a[jo] += pr;
450 }
451 pr = y[i];
452 for (jo=0; jo<(m+1); jo++) {
453 e[jo] += pr;
454 pr *= x[i];
455 q[jo][m+1] = e[jo];
456 }
457 e[m+1] += SQR(y[i]);
458 }
459
460 for (i=0; i<(m+1); i++) {
461 for (jo=0; jo<(m+1); jo++) {
462 q[i][jo] = a[i+jo];
463 }
464 }
465
466 /* Begin of Gauss-Elimination */
467 for (s=0; s<(m+1); s++) {
468 t = s;
469 label1: ;
470 if (q[t][s] != 0.0) {
471 goto label2;
472 }
473 t ++;
474 if (t < m) {
475 goto label1;
476 }
477 out_err(MAT, ERR_FILE, ERR_LINE,
478 _("Gauss-Elimination: No possible solution!") );
479 return;
480 label2: ;
481 for (jo=0; jo<(m+2); jo++) {
482 br = q[s][jo];
483 q[s][jo] = q[t][jo];
484 q[t][jo] = br;
485 }
486 cr = 1./q[s][s];
487 for (jo=0; jo<(m+2); jo++) {
488 q[s][jo] *= cr ;
489 }
490 for (t=0; t<(m+1); t++) {
491 if (t == s) {
492 goto label3;
493 }
494 cr = -q[t][s];
495 for (jo=0; jo<(m+2); jo++) {
496 q[t][jo] += (cr * q[s][jo]);
497 }
498 label3: ;
499 }
500 }
501 /* End of Gauss-Elimination */
502
503 sr = 0.0;
504 for (i=1; i<(m+1); i++) {
505 sr += q[i][m+1] * (e[i]-a[i]*e[0]/(REAL)n);
506 }
507
508 tr = e[m+1] - e[0]*e[0]/(REAL)n;
509 cr = tr-sr;
510 f2 = n-m-1;
511 DIV(jor, sr, (REAL)m);
512 DIV(kr, cr, (REAL)f2);
513 f1 = m;
514 DIV(fr, jor, kr);
515 DIV(br, sr, tr);
516 SQRT(kr, br);
517 DIV(sr, cr, (REAL)f2);
518 SQRT(sr, sr);
519
520 out_start();
521 for (i=0; i<(m+1); i++) {
522 a[i] = q[i][m+1];
523 out_r(_("Coefficient a%i = %15e\n"), i, a[i]);
524
525 }
526
527 colorize(ClHeader);
528 out_r(_("\nResult polynomial regression:\n") );
529 colorize(ClDefault);
530 out_r(_("Regressed equation: y = a0 + a1*x + a2*x^2 +...+ an*x^n\n\n") );
531
532 if (!noplot)
533 plot_poly(x, y, n, a, m, get_label(x), get_label(y));
534
535 out_r(_("Coefficient of determination r^2 = %f\n"), br);
536 out_r(_("Correlation coefficient r = %f\n"), kr);
537 out_r(_("Standard deviation s = %f\n"), sr);
538
539 if (!equal(1.0, kr)) {
540 out_r(_("F-value = %f\n"), fr);
541 out_r(_("Degree of freedom f1 = %i\n"), f1);
542 out_r(_("Degree of freedom f2 = %i\n"), f2);
543
544 out_r(_("\nF-Test:\n"));
545 out_r(_("Hypothesis H0: r=0 against hypothesis H1: r>0 or r<0\n") );
546 alpha = get_f_int(fr, f1, f2);
547 if (kr < 0.0) {
548 alpha = 1.-alpha;
549 }
550 out_r(_("Probability of H0: %6.4f\n\n"), 1.0-alpha);
551 }
552 else {
553 out_r(_("F-Test not possible since r = 1\n\n") );
554 }
555 out_end();
556 }
557
558 /* =================================================================== */
559
560
561 void correl_matrix(PREAL xx[], int nrow, int ncol) {
562 int i, k;
563 REAL *x, *y, x_sdv, y_sdv, s_xy;
564 /* PREAL *r; */
565 PREAL r[MCOL];
566 char label[10];
567
568 /* r = (PREAL*)m_calloc(ncol, sizeof(PREAL)); */
569 for (i=0; i<ncol; i++) {
570 r[i] = (REAL*) m_calloc(ncol, sizeof(REAL));
571 }
572
573 for (i=0; i<ncol; i++) {
574 r[i][i] = 1.0;
575 for (k=0; k<i; k++) {
576 x = xx[i];
577 y = xx[k];
578 s_xy = (get_xysum(x,y,nrow) - ((get_sum(x,nrow)*get_sum(y,nrow))/
579 (REAL) nrow))/ (REAL) (nrow-1);
580 x_sdv = get_sdv(x,nrow);
581 y_sdv = get_sdv(y,nrow);
582 DIV(r[i][k], s_xy, (x_sdv*y_sdv));
583 r[k][i] = r[i][k];
584 }
585 }
586
587 out_start();
588 for(i = 0; i < 10; i++)
589 label[i] = 0;
590 colorize(ClHeader);
591 out_r(_("Matrix of linear correlation of variables:\n\n") );
592 out_r(_("Variable ") );
593 for (i=0; i<ncol; i++) {
594 /* out_r("%8i", (i+1)); */
595 strncpy(label, get_name(xx[i]), 9);
596 if (strlen(label)>6) {
597 label[6] = '.';
598 label[7] = '\0';
599 }
600 out_r("%-8s", label);
601 }
602 colorize(ClDefault);
603 out_r("\n");
604 for (i=0; i<ncol; i++) {
605 out_r("--------");
606 }
607 out_r("------------\n");
608
609 for (i=0; i<ncol; i++) {
610 /* out_r(" %2i | ", (i+1)); */
611 strncpy(label, get_name(xx[i]), 9);
612 if (strlen(label)>6) {
613 label[6] = '.';
614 label[7] = '\0';
615 }
616 colorize(ClHeader);
617 out_r(" %-7s", label);
618 colorize(ClDefault);
619 out_r(" | ");
620 for (k=0; k<ncol; k++) {
621 out_r("%8.4f", r[i][k]);
622 }
623 out_r("\n");
624 }
625 out_r("\n");
626 out_end();
627 }
628
629
630 /* =================================================================== */
631
632 void rank_matrix(PREAL xx[], int nrow, int ncol) {
633 int i, k;
634 REAL *x, *y;
635 /* PREAL *r; */
636 PREAL r[MCOL];
637
638 char label[64];
639
640 /* r = (PREAL*)m_calloc(ncol, sizeof(PREAL)); */
641 for (i=0; i<ncol; i++) {
642 r[i] = (REAL*) m_calloc(ncol, sizeof(REAL));
643 }
644
645 for (i=0; i<ncol; i++) {
646 r[i][i] = 1.0;
647 for (k=0; k<i; k++) {
648 x = xx[i];
649 y = xx[k];
650 if ((r[i][k]=get_rank_correlation(x, y, nrow))==REAL_MIN) {
651 return;
652 }
653 r[k][i] = r[i][k];
654 }
655 }
656
657 out_start();
658 colorize(ClHeader);
659 out_r(_("Matrix of SPEARMAN'S Rank-Correlation-Coefficients\n") );
660 out_r(_("of the variables:\n\n") );
661 out_r(_("Variable ") );
662 for (i=0; i<ncol; i++) {
663 /* out_r("%8i", (i+1)); */
664 strncpy(label, get_name(xx[i]), 9);
665 if (strlen(label)>6) {
666 label[6] = '.';
667 label[7] = '\0';
668 }
669 out_r("%-8s", label);
670 }
671 colorize(ClDefault);
672 out_r("\n");
673 for (i=0; i<ncol; i++) {
674 out_r("--------");
675 }
676 out_r("------------\n");
677
678 for (i=0; i<ncol; i++) {
679 /* out_r(" %2i | ", (i+1)); */
680 strncpy(label, get_name(xx[i]), 9);
681 if (strlen(label)>6) {
682 label[6] = '.';
683 label[7] = '\0';
684 }
685 colorize(ClHeader);
686 out_r(" %-7s", label);
687 colorize(ClDefault);
688 out_r(" | ");
689 for (k=0; k<ncol; k++) {
690 out_r("%8.4f", r[i][k]);
691 }
692 out_r("\n");
693 }
694 out_r("\n");
695 out_end();
696 }
697
698
699 /* out_r("Variable "); */
700 /* for (i=0; i<ncol; i++) { */
701 /* out_r("%8i", (i+1)); */
702 /* } */
703 /* out_r("\n"); */
704 /* for (i=0; i<ncol; i++) { */
705 /* out_r("--------"); */
706 /* } */
707 /* out_r("------------\n"); */
708
709 /* for (i=0; i<ncol; i++) { */
710 /* out_r(" %2i | ", (i+1)); */
711 /* for (k=0; k<ncol; k++) { */
712 /* out_r("%8.4f", r[i][k]); */
713 /* } */
714 /* out_r("\n"); */
715 /* } */
716 /* out_r("\n"); */
717 /* } */
718
719
720 /* =================================================================== */
721
722 void probit(REAL dose[], REAL num[], REAL effect[], int n) {
723 REAL propcalc, chisq, ycalc;
724 REAL *aprobit, *logx, neq, yh, w, oc;
725 int i, np, offset, ind, old, nerr = 0;
726 BOOLEAN positiv, equal_num, equal_dose, *infinity, show_errors = TRUE;
727 REAL x_mean, x_sdv, y_mean, y_sdv;
728 REAL s_xy, r_xy, a1, a0, ed50, ed90, alpha, tau, arg;
729 REAL nw_sum, nwx_sum, nwxsqr_sum, s_m, conf_l, conf_u, mq_m;
730
731 REAL dose0, dose1;
732
733 aprobit = (REAL*)m_calloc(n, sizeof(REAL));
734 logx = (REAL*)m_calloc(n, sizeof(REAL));
735 infinity = (BOOLEAN*)m_calloc(n, sizeof(BOOLEAN));
736
737 np = 0;
738
739 out_start();
740 for (i=0; i<n; i++) {
741 yh = effect[i]/num[i];
742 if (yh < 0.50) {
743 positiv = TRUE;
744 yh = 1. - yh;
745 }
746 else {
747 positiv = FALSE;
748 }
749
750 if (yh >= 1.0) {
751 infinity[i] = TRUE;
752 nerr++;
753 if(nerr == 30){
754 out_d("\n\n");
755 out_i(_("Continue showing these math warnings? (%s) "), _("y/N"));
756 GETNLINE;
757 if(!(empty) && (line[0] != _("y")[0] || line[0] != _("Y")[0])){
758 show_errors = FALSE;
759 }
760 }
761 if(show_errors){
762 out_err(MWA, ERR_FILE, ERR_LINE,
763 _("Can not calculate probit: dose %g count %g effect %g"),
764 dose[i], num[i], effect[i]);
765 }
766 }
767 else {
768 infinity[i] = FALSE;
769
770 neq = 0.;
771 if (!equal(yh, 0.5)) {
772 neq = get_z(yh);
773 }
774
775 if (positiv) {
776 neq = -1. * neq;
777 }
778 aprobit[np] = neq + 5.;
779
780 if (dose[i] <= 0.0) {
781 out_err(MAT, ERR_FILE, ERR_LINE, _("Dose %i <= 0!\n"), i);
782 out_end();
783 return;
784 }
785
786 logx[np] = log10(dose[i]);
787 out_r(_("dose=%6g num=%g effect=%2f prop=%4.2f probit=%5.2f\n"),
788 dose[i], num[i], effect[i], (effect[i]/num[i]), aprobit[np]);
789 np++;
790 }
791 if(show_errors){
792 if((np + nerr + 1) % 12 == 0)
793 mywait();
794 } else
795 if((np + 1) % 12 == 0)
796 mywait();
797 }
798
799
800 if (np < 2) {
801 out_err(MAT, ERR_FILE, ERR_LINE,
802 _("Less than 2 valid dose-effect pairs!") );
803 out_end();
804 return;
805 }
806
807
808 /* Test for equal probabilities of effects and equal doses, respectively */
809 equal_num = TRUE;
810 equal_dose = TRUE;
811 i=0;
812 while(infinity[i]) {
813 i++;
814 }
815 old=i;
816 for (ind=0; ind<np; ind++) {
817 while(infinity[i]) {
818 i++;
819 }
820 if ( (ind>0) && ((effect[i]/num[i])!=(effect[old]/num[old])) ) {
821 equal_num = FALSE;
822 }
823 if ( (ind>0) && (dose[i] != dose[old]) ) {
824 equal_dose = FALSE;
825 }
826 old=i;
827 i++;
828 }
829
830 if (equal_num) {
831 out_err(MAT, ERR_FILE, ERR_LINE, _("All effect probabilities are equal!"));
832 out_end();
833 return;
834 }
835
836 if (equal_dose) {
837 out_err(MAT, ERR_FILE, ERR_LINE, _("All doses are equal!") );
838 out_end();
839 return;
840 }
841
842
843 mywait();
844
845 x_mean = get_mean(logx,np);
846 y_mean = get_mean(aprobit,np);
847 x_sdv = get_sdv(logx,np);
848 y_sdv = get_sdv(aprobit,np);
849 s_xy = (get_xysum(logx,aprobit,np) - ((get_sum(logx,np)*get_sum(aprobit,np))/
850 (REAL) np))/(REAL) (np-1);
851 r_xy = s_xy/(x_sdv*y_sdv);
852 /* rr = r_xy * r_xy; */
853 a1 = r_xy * (y_sdv/x_sdv);
854 a0 = y_mean - a1*x_mean;
855 DIV(arg, SQR(r_xy), (1.-SQR(r_xy)));
856 SQRT(tau, ((np-2.)*arg));
857
858 chisq = 0.;
859 nw_sum = 0.0;
860 nwx_sum = 0.0;
861 nwxsqr_sum = 0.0;
862 offset = 0;
863
864 for (i=0; i<np; i++) {
865 if (infinity[i]) {
866 offset++;
867 }
868 ind = i+offset;
869 ycalc = a0 + a1*logx[i];
870 neq = ycalc-5;
871 if (neq > 0.) {
872 positiv = TRUE;
873 neq = neq * (-1.);
874 }
875 else {
876 positiv = FALSE;
877 }
878
879 propcalc = get_norm_int(neq);
880 if (positiv) {
881 propcalc = 1. - propcalc;
882 }
883 w = SQR(get_norm_ord(ycalc-5.))/(propcalc*(1.-propcalc));
884 /* printf("%g %g %g\n", logx[i], ycalc-5, w); */
885 nw_sum += num[ind]*w;
886 nwx_sum += num[ind]*w*logx[i];
887 nwxsqr_sum += num[ind]*w*SQR(logx[i]);
888 oc = SQR(effect[ind] - (num[ind]*propcalc))/
889 (num[ind]*propcalc*(1.-propcalc));
890 chisq += oc;
891 /*
892 out_r("i=%i ind=%i num=%f effect=%f y=%f P=%f w=%f oc=%f logx=%g\n",
893 i, ind, num[ind], effect[ind], ycalc, propcalc, w, oc, logx[i]);
894 */
895 }
896
897
898 /*
899 out_r("np = %i\n", np);
900 out_r("nwxsqr_sum = %g\n", nwxsqr_sum);
901 out_r("nwx_sum = %g\n", nwx_sum);
902 out_r("nw_sum = %g\n", nw_sum);
903 */
904
905 nwxsqr_sum = nwxsqr_sum - ( SQR(nwx_sum)/nw_sum );
906 x_mean = nwx_sum/nw_sum;
907 a0 = y_mean - a1*x_mean;
908 ed50 = (5.0-a0)/a1;
909 mq_m = 1./SQR(a1) * (1./nw_sum + SQR(ed50-x_mean)/nwxsqr_sum);
910 s_m = sqrt(mq_m);
911 conf_l = ed50-(1.96*s_m);
912 conf_u = ed50+(1.96*s_m);
913 ed90 = (6.28-a0)/a1;
914
915 /*
916 out_r("nwxsqr = %g\n", nwxsqr_sum);
917 out_r("x_mean = %g\n", x_mean);
918 out_r("mq_m = %g\n", mq_m);
919 out_r("s_m = %g\n", s_m);
920 */
921
922 colorize(ClHeader);
923 out_r(_("Result probit analysis:\n") );
924 colorize(ClDefault);
925 if (a1<0.0) {
926 out_err(MWA, ERR_FILE, ERR_LINE,
927 _("Inverse probit curve due to negative slope a1!!!") );
928 }
929 out_r("ED50 = %g\n", pow(10., ed50));
930 out_r(_("Confidence range (95%%) for ED50: [%g, %g]\n"),
931 pow(10., conf_l), pow(10., conf_u));
932 /*
933 out_r("nw_sum=%f nwx_sum=%f x_mean=%f s_m=%f\n",
934 nw_sum, nwx_sum, x_mean, s_m);
935 */
936 out_r("ED90 = %g\n", pow(10., ed90));
937 out_r("a0 = %g\n", a0);
938 out_r("a1 = %g\n", a1);
939 out_r( _("Chi^2= %g\n"), chisq);
940 out_r(_("Degrees of freedom = %i\n"),(np-2));
941 out_r(_("Correlation coefficient r = %f\n"), r_xy);
942 out_r(_("Check value Tau = %f\n"), tau);
943 if (tau>0.0) {
944 out_r(_("\nt-Test with check value Tau:\n") );
945 out_r(_("Hypothesis H0: Correlation according to probit-model exists\n") );
946 alpha = get_t_int(tau, (np-2));
947 out_r(_("Probability of H0: %f\n"), alpha);
948 }
949 else {
950 out_r(_("t-Test with Tau not possible since Tau = 0\n") );
951 }
952 out_r(_("\nChi^2-test:\n") );
953 out_r(_("Hypothesis H0: r=0 vs. H1: r#0\n") );
954 alpha = get_chi_int(chisq, (np-2));
955 out_r(_("Probability of H0: %6.4f\n\n"), 1.0-alpha);
956
957 if (!noplot) {
958 dose0 = pow(10., ((2.1-a0)/a1)); /* ~ 0% Probability */
959 dose1 = pow(10., ((8.5-a0)/a1)); /* ~ 100 % Probability */
960 #ifndef STATIST_X
961 out_r( _("doselab=|%s|, effectlab=|%s|\n"),
962 get_name(dose), get_name(effect));
963 #endif
964 plot_probit(dose, num, effect, n, a0, a1, dose0, dose1,
965 get_label(dose), get_label(effect));
966 }
967 out_end();
968
969 }
970
971 /* =================================================================== */
972
973
974 void kolmo_test(REAL x[], int n) {
975 REAL mean, sdv;
976 int i, k, alpha;
977 REAL d, norm0, *cdf, *z, diff, fn1 = 0, fn2 = 0, z0 = 0;
978
979 mean = get_mean(x,n);
980 sdv = get_sdv(x,n);
981
982
983 z = (REAL*)m_calloc(n, sizeof(REAL));
984 cdf = (REAL*)m_calloc(n, sizeof(REAL));
985 for (i=0; i<n; i++) {
986 DIV(z[i], (x[i]-mean), sdv);
987 cdf[i] = (REAL)(i+1)/(REAL)n;
988 }
989
990 qsort(z, n, sizeof(REAL), real_compar_up);
991
992 d = 0.0;
993 for (i=0; i<n; i++) {
994 norm0 = get_norm_int(z[i]);
995
996 k = i+1;
997 do {
998 k--;
999 diff = fabs(cdf[k]-norm0);
1000 /* printf("z=%g cdf=%g norm=%g d=%g\n", z[k], cdf[k], norm0, diff); */
1001 if (diff > d) {
1002 d = diff;
1003 fn1 = cdf[k];
1004 fn2 = norm0;
1005 z0 = z[i];
1006 }
1007 }
1008 while (equal(z[k], z[i]));
1009 }
1010
1011 if (!noplot) {
1012 plot_cdf_ks(z, n, z0, fn1, fn2, x, get_label(x));
1013 }
1014
1015 alpha = pks(d, n);
1016
1017 /* prob=pks(sqrt( SQR((REAL)n) / ((REAL)(2*n)))*(d)); */
1018
1019 out_start();
1020 out_r(_("Hypothesis H0: Data is normally distributed versus\n") );
1021 out_r(_("Hypothesis H1: Data is not normally distributed\n\n") );
1022
1023 colorize(ClHeader);
1024 out_r(_("Result KS-Lilliefors-Test on normal distribution:\n") );
1025 colorize(ClDefault);
1026 out_r(_("D (calculated)= %f\n"), d);
1027 out_r(_("Number of data = %i\n"), n);
1028
1029 out_r(_("Mean = %g\n"), mean);
1030 out_r(_("Standard deviation = %g\n"), sdv);
1031
1032 if (alpha > 0) {
1033 out_r(_("H0 accepted with a significance level of %i%%\n"), alpha);
1034 }
1035 else {
1036 out_r(_("H0 rejected\n") );
1037 }
1038 out_end();
1039 }
1040
1041 void percentiles(REAL x[], int n) {
1042 REAL mean, sdv;
1043 int i, index;
1044 REAL p, *z, alpha = 0;
1045
1046 mean = get_mean(x,n);
1047 sdv = get_sdv(x,n);
1048
1049
1050 z = (REAL*)m_calloc(n, sizeof(REAL));
1051 for (i=0; i<n; i++) {
1052 z[i] = x[i];
1053 }
1054
1055 qsort(z, n, sizeof(REAL), real_compar_up);
1056
1057 if (!noplot) {
1058 plot_cdf(z, n, get_label(x));
1059 }
1060
1061 out_start();
1062 out_r(_("Percentiles for column \"%s\"\n"), get_label(x));
1063 alpha = 0;
1064 for(i = 0; i < 9; i++) {
1065 alpha += 0.1;
1066 index = (int)(ceil((REAL)n * alpha));
1067 p = z[index-1];
1068 out_r("%i%%\t%g\n", (int)(alpha*100.5), p);
1069 }
1070 alpha = 0.95;
1071 index = (int)(ceil((REAL)n * alpha));
1072 p = z[index-1];
1073 out_r("%d%%\t%g\n", (int)(alpha*100.0), p);
1074 alpha = 1.0;
1075 index = (int)(ceil((REAL)n * alpha));
1076 p = z[index-1];
1077 out_r("%d%%\t%g\n\n", (int)(alpha*100.0), p);
1078
1079 out_end();
1080 }
1081
1082
1083 /* ===================================================================
1084
1085 void compare_dist(REAL x[], int nx, REAL y[], int ny) {
1086 REAL d, prob, alpha;
1087
1088 ks(x, nx, y, ny, &d, &prob);
1089
1090 out_start();
1091 out_r("Hypothese H0: x und y sind gleich verteilt gegen \n");
1092 out_r("Hypothese H1: x und y sind nicht gleich verteilt \n\n");
1093
1094 out_r("Ergebnis Kolmogoroff-Smirnoff-Test auf Normalverteilung:\n");
1095 out_r("D (berechnet)= %f\n", d);
1096 out_r("Anzahl der x-Werte = %i\n", nx);
1097 out_r("Anzahl der y-Werte = %i\n", ny);
1098
1099 if (prob == 0.0) {
1100 alpha = 0.0;
1101 }
1102 else {
1103 alpha = 1.0 - prob;
1104 }
1105 out_r("Irrtumswahrscheinlichkeit alpha fuer H0 = %f\n\n", alpha);
1106 out_end();
1107 }
1108
1109 =================================================================== */
1110
1111
1112 void equal_freq(REAL x[], int n) {
1113 int *y, class[MCLASS], freq[MCLASS];
1114 int nclass, i, k, df;
1115 REAL chisq, phi, alpha, chi;
1116 BOOLEAN found;
1117
1118 nclass = 0;
1119 y = (int*) m_calloc(n, sizeof(int));
1120 for (i=0; i<n; i++) {
1121 y[i] = get_round(x[i]);
1122 }
1123
1124 for (i=0; i<n; i++) {
1125 found = FALSE;
1126 for (k=0; k<nclass; k++) {
1127 if (y[i]==class[k]) {
1128 found = TRUE;
1129 freq[k] ++;
1130 break;
1131 }
1132 }
1133 if (!found) {
1134 class[nclass] = y[i];
1135 freq[nclass] = 1;
1136 nclass ++;
1137 }
1138 if(nclass > MCLASS){
1139 out_err(MAT, ERR_FILE, ERR_LINE,
1140 _("Test aborted: There are more than %i classes of frequency."),
1141 MCLASS);
1142 return;
1143 }
1144 }
1145
1146 out_start();
1147 for (k=0; k<nclass; k++) {
1148 if (freq[k]<=5) {
1149 out_r(_("Warning: This test shouldn't be applied,\n"
1150 "\tsince there are frequencies <= 5!\n\n") );
1151 break;
1152 }
1153 }
1154
1155
1156 chisq = 0;
1157 phi = (REAL)n/(REAL)nclass;
1158
1159 if ( (nclass==2) && (n<200) ) {
1160 out_r(_("Correction according to YATES applied, since just 2 classes and n<200\n\n") );
1161 if (n<25) {
1162 out_r(_("Warning: FISCHER-Test shouldn't be applied,\n\tsince number of values <25\n\n") );
1163 }
1164 for (k=0; k<nclass; k++) {
1165 /* chisq += SQR(fabs(freq[k]-phi)-0.5)/phi; */
1166 DIV(chi, SQR(fabs(freq[k]-phi)-0.5), phi);
1167 chisq += chi;
1168 }
1169 }
1170 else {
1171 for (k=0; k<nclass; k++) {
1172 /* chisq += SQR( (REAL)freq[k] - phi)/phi; */
1173 DIV(chi, SQR( (REAL)freq[k] - phi), phi);
1174 chisq += chi;
1175 }
1176 }
1177
1178 df = nclass -1;
1179
1180 colorize(ClHeader);
1181 out_r(_("Result Chi^2-Test of equal frequency:\n") );
1182 colorize(ClDefault);
1183 out_r(_("Hypothesis H0: Values have equal frequency\n") );
1184 out_r(_("Hypothesis H1: Values don't have equal frequencies\n\n") );
1185
1186 if (df >= 1) {
1187 out_r( _("Chi^2 = %f\n"), chisq);
1188 out_r(_("Degrees of freedom = %i\n"), df);
1189 alpha = 1. - get_chi_int(chisq, df);
1190 out_r(_("Probability of H0: %6.4f\n\n"), 1.0-alpha);
1191 }
1192 else {
1193 out_r(_("Chi^2-Test of normal distribution not possible since degrees of freedom < 1!\n\n") );
1194 }
1195 out_end();
1196 }
1197
1198 /* ========================================================================== */
1199
1200
1201 void compare_freq(REAL x[], int nx, REAL y[], int ny) {
1202 typedef struct {
1203 int class, x, y;
1204 } FREQUENCIES;
1205
1206 int *ix, *iy;
1207 int nclass, i, k, df;
1208 REAL chisq, phi, alpha, chi;
1209 BOOLEAN found;
1210 FREQUENCIES freq[MCLASS];
1211
1212 nclass = 0;
1213 for (i=0; i<MCLASS; i++) {
1214 freq[i].x = 0;
1215 freq[i].y = 0;
1216 }
1217
1218 ix = (int*) m_calloc(nx, sizeof(int));
1219 for (i=0; i<nx; i++) {
1220 ix[i] = get_round(x[i]);
1221 }
1222
1223 for (i=0; i<nx; i++) {
1224 found = FALSE;
1225 for (k=0; k<nclass; k++) {
1226 if (ix[i]==freq[k].class) {
1227 found = TRUE;
1228 freq[k].x ++;
1229 break;
1230 }
1231 }
1232 if (!found) {
1233 freq[nclass].class = ix[i];
1234 freq[nclass].x = 1;
1235 nclass ++;
1236 }
1237 }
1238
1239
1240 iy = (int*) m_calloc(ny, sizeof(int));
1241 for (i=0; i<ny; i++) {
1242 iy[i] = get_round(y[i]);
1243 }
1244
1245 for (i=0; i<ny; i++) {
1246 found = FALSE;
1247 for (k=0; k<nclass; k++) {
1248 if (iy[i]==freq[k].class) {
1249 found = TRUE;
1250 freq[k].y ++;
1251 break;
1252 }
1253 }
1254 if (!found) {
1255 freq[nclass].class = iy[i];
1256 freq[nclass].y = 1;
1257 nclass ++;
1258 }
1259 if(nclass > MCLASS){
1260 out_err(MAT, ERR_FILE, ERR_LINE,
1261 _("Test aborted: There are more than %i classes of frequency."),
1262 MCLASS);
1263 return;
1264 }
1265 }
1266
1267 out_start();
1268 for (k=0; k<nclass; k++) {
1269 if (freq[k].x<=5) {
1270 out_r(_("Warning: This test shouldn't be applied,\n"
1271 "\tsince there are frequencies <= 5!\n\n") );
1272 break;
1273 }
1274 }
1275
1276 chisq = 0;
1277
1278 if ( (nclass==2) && (nx<200) ) {
1279 out_r(_("Correction according to YATES applied, since just 2 classes and n<200\n\n") );
1280 if (nx<25) {
1281 out_r(_("Warning: FISCHER-Test shouldn't be applied,\n\tsince number of values <25\n\n") );
1282 }
1283 for (k=0; k<nclass; k++) {
1284 phi = ((REAL)freq[k].y/(REAL)ny)*(REAL)nx;
1285 /* chisq += SQR(fabs(freq[k].x-phi)-0.5)/phi; */
1286 DIV(chi, SQR(fabs(freq[k].x-phi)-0.5), phi);
1287 chisq += chi;
1288 }
1289 }
1290 else {
1291 for (k=0; k<nclass; k++) {
1292 phi = ((REAL)freq[k].y/(REAL)ny)*(REAL)nx;
1293 /* chisq += SQR( (REAL)freq[k].x - phi)/phi; */
1294 DIV(chi, SQR( (REAL)freq[k].x - phi), phi);
1295 chisq += chi;
1296 }
1297 }
1298
1299 df = nclass -1;
1300
1301 colorize(ClHeader);
1302 out_r(_("Result Chi^2-Test of equal frequency:\n") );
1303 colorize(ClDefault);
1304 out_r(_("Hypothesis H0: x and y are equally distributed\n") );
1305 out_r(_("Hypothesis H1: x and y are not equally distributed\n") );
1306
1307 if (df >= 1) {
1308 out_r( _("Chi^2 = %f\n"), chisq);
1309 out_r(_("Degrees of freedom: %i\n"), df);
1310 alpha = 1. - get_chi_int(chisq, df);
1311 out_r(_("Probability of H0: %6.4f\n\n"), 1.0-alpha);
1312 }
1313 else {
1314 out_r(_("Chi^2-Test of normal distribution not possible since degrees of freedom < 1!\n\n") );
1315 }
1316 out_end();
1317 }
1318
1319 /* ========================================================================== */
1320
1321
1322
1323 void lin_reg(REAL x[], REAL y[], int n) {
1324 REAL x_mean, x_sdv, y_mean, y_sdv;
1325 REAL s_xy, r_xy, a1, a0, rr, t, alpha;
1326 /* REAL sse, qysum; */
1327 int df;
1328
1329 x_mean = get_mean(x,n);
1330 y_mean = get_mean(y,n);
1331 x_sdv = get_sdv(x,n);
1332 y_sdv = get_sdv(y,n);
1333 s_xy = (get_xysum(x,y,n) - ((get_sum(x,n)*get_sum(y,n))/(REAL) n))/
1334 (REAL) (n-1);
1335 DIV(r_xy, s_xy, x_sdv*y_sdv);
1336 rr = r_xy * r_xy;
1337 a1 = r_xy * (y_sdv/x_sdv);
1338 a0 = y_mean - a1*x_mean;
1339
1340 df = n - 2;
1341 /* z = r_xy * sqrt((REAL)n-1.); */
1342 /*
1343 sse = 0.0;
1344 qysum = 0.0;
1345 for (i=0; i<n; i++) {
1346 sse += SQR(y[i] - (a0 + a1*x[i]));
1347 qysum += SQR(y[i] - y_mean);
1348 }
1349 */
1350 out_start();
1351 colorize(ClHeader);
1352 out_r(_("\nResults of linear regression:\n") );
1353 colorize(ClDefault);
1354 out_r(_("number of data points n : %i \n"),n);
1355 out_r(_("Intersection a0 : %g \n"),a0);
1356 out_r(_("Slope a1 : %g \n"),a1);
1357 out_r(_("Correlation coefficient r : %g \n"),r_xy);
1358 out_r(_("Coefficient of determination r^2 : %g \n"),rr);
1359 out_r(_("Degr. of freedom df = n-2 : %i \n"), df);
1360 /* out_r("qysum=%f6.4, sse=%6.4f, r^2=%6.4f\n", qysum, sse, 1-sse/qysum); */
1361 if (fabs(r_xy) < 0.999999999) {
1362 t = r_xy * sqrt(((REAL)n-2.)/(1.-SQR(r_xy)));
1363 out_r(_("Estimated t-value : %f\n"), t);
1364 alpha = get_t_int(fabs(t), df);
1365 out_r(_("\nt-Test:\n") );
1366 out_r(_("Hypothesis H0: r = 0 against hypothesis H1: r1 # 0 (->two-sided)\n") );
1367 /* a = alpha; */
1368 out_r(_("Probability of H0 = %6.4f\n\n"), 1.0-alpha);
1369 } else{
1370 out_r(_("t-Test not possible since |r| = 1!\n") );
1371 }
1372
1373 if (!noplot) {
1374 plot_pair(x, y, n, a0, a1, get_label(x), get_label(y));
1375 }
1376
1377 out_r("\n");
1378 out_end();
1379 }
1380
1381
1382
1383 /* =================================================================== */
1384
1385 void point_biserial_reg(REAL x[], REAL y[], int n) {
1386 REAL m_y0, m_y1, ysdv, *y0, *y1;
1387 REAL r_pb, rr, t, alpha;
1388 int *category;
1389 int i, n0=0, n1=0, df;
1390
1391 y0 = (REAL*)m_calloc(n, sizeof(REAL));
1392 y1 = (REAL*)m_calloc(n, sizeof(REAL));
1393 category = (int*)m_calloc(n, sizeof(int));
1394
1395 for (i=0; i<n; i++) {
1396 category[i] = get_round(x[i]);
1397 if ((category[i] != TRUE) && (category[i] != FALSE)) {
1398 out_err(ERR, ERR_FILE, ERR_LINE,
1399 _("First column must contain only 0's and 1's!"));
1400 return;
1401 }
1402 if (category[i]) {
1403 y1[n1] = y[i];
1404 n1++;
1405 }
1406 else {
1407 y0[n0] = y[i];
1408 n0++;
1409 }
1410 }
1411
1412 m_y0 = get_mean(y0, n0);
1413 m_y1 = get_mean(y1, n1);
1414
1415 ysdv = get_sdv(y, n);
1416 DIV(r_pb, (m_y1-m_y0), ysdv);
1417 r_pb *= sqrt((REAL)(n1*n0)/(n*(n-1)));
1418 /* r_pb = (m_y1-m_y0)/ysdv*sqrt((REAL)(n1*n0)/(n*(n-1))); */
1419
1420 rr = r_pb*r_pb;
1421 df = n - 2;
1422
1423 out_start();
1424 colorize(ClHeader);
1425 out_r(_("\nResult point biserial correlation:\n") );
1426 colorize(ClDefault);
1427 out_r(_("Number of data points n : %i \n"), n);
1428 out_r(_("Correlation coefficient r: %20.12e \n"), r_pb);
1429 out_r(_("Coefficient of determination r^2 : %20.12e \n"), rr);
1430 out_r(_("Degrees of freedom df = n-2 : %i \n"), df);
1431
1432 if (fabs(r_pb) < 1.) {
1433 t = r_pb * sqrt(((REAL)n-2.)/(1.-SQR(r_pb)));
1434 /* f = SQR(r_pb)/(1.-SQR(r_pb)) * (REAL)(n-2); */
1435 out_r(_("Calculated t-value : %f \n"), t);
1436 alpha = get_t_int(fabs(t), df);
1437 out_r(_("\nt-Test:\n") );
1438 out_r(_("Hypothesis H0: r = 0 versus hypothesis H1: r1 # 0 (->two-sided)\n") );
1439 /* a = alpha; */
1440 out_r(_("Probability of H0 = %6.4f\n\n"), 1.0-alpha);
1441 } else{
1442 out_r(_("t-Test not possible since |r| = 1!\n") );
1443 }
1444
1445 out_r("\n");
1446 out_end();
1447 }
1448
1449
1450 /* =================================================================== */
1451
1452
1453 void histogram(REAL x[], int n, int mclass, REAL min, REAL max) {
1454 /* calculates the classes for a histrogram
1455 calls: plot_hist() or plot_histo_discrete() with it*/
1456
1457 int aclass[MCLASS]; /* counter for the values in class [i] */
1458 REAL classval[MCLASS], fac, fac_short,arg, step;
1459 BOOLEAN zeroclass = FALSE; /* true, if a class remains empty */
1460 BOOLEAN autoclass; /* whether autoclass is selected or not */
1461 BOOLEAN hit_max_mark; /* TRUE, when a point hits the max mark */
1462 BOOLEAN discrete=FALSE; /* if we have only points, hitting the marks */
1463 int lower=0,higher=0; /*counts, how many values are not counted in aclass*/
1464 int interval; /* width of one class */
1465 int i,ii; /* loop variables */
1466
1467 autoclass=(mclass==0); /* check for autoclass */
1468
1469 if (autoclass) { /* autoclass was choosen*/
1470 /* start with a good approximation
1471 of the number of classes. Adjust that,
1472 as long, as we get empty classes */
1473 mclass = get_round(1. + 3.32*log10((REAL)n));
1474 }
1475
1476
1477 do {
1478 for (i=0; i<mclass; i++) {/* clear class counters */
1479 aclass[i] = 0;
1480 }
1481 lower=higher=0; /* reset counters */
1482 hit_max_mark=FALSE; /* resetting */
1483
1484 fac = (max-min)/(REAL)(mclass); /* size of each intervall*/
1485 for (i=0; i<n; i++) {
1486 if(x[i]<min) { lower++; continue; } /* count lower values */
1487
1488 arg = (x[i]-min)/fac;
1489 interval = (int) arg;
1490 /* see if the value lower or equal to max
1491 inc. aclass[intervall] or higher accordingly*/
1492 if(interval<mclass)
1493 {
1494 aclass[interval]++;
1495 }else
1496 { /* We have to include the right border for
1497 cases with pure integers */
1498 if(x[i]<=max)
1499 {
1500 aclass[mclass-1]++;
1501 /* if a value hits the last point, there is
1502 a chance that we are discrete */
1503 hit_max_mark=TRUE;
1504 }
1505 else
1506 higher++;
1507 }
1508 }
1509 /* if we are in automatic class selection
1510 check for for classes, which are empty
1511 with the current number of classes */
1512 if(autoclass)
1513 {
1514 zeroclass = FALSE;
1515 for (i=0; i<mclass; i++) {
1516 if (aclass[i] == 0) {
1517 zeroclass = TRUE;
1518 mclass --;
1519 break;
1520
1521 }
1522 }
1523 }
1524 } while (autoclass && zeroclass);
1525
1526 /* test if we are discrete, when a point has
1527 hit the max mark with the last mclass number*/
1528 if(hit_max_mark)
1529 {
1530 REAL help;
1531 discrete=TRUE;
1532 fac_short=(max-min)/(REAL)(mclass-1);
1533
1534 for(ii=0; ii<n;ii++)
1535 { help=(x[ii]-min)/fac_short;
1536 if( help!=(int)(help))
1537 { discrete=FALSE; break; }
1538 }
1539 }
1540
1541 /* we have aclass[] properly filled*/
1542 out_start();
1543 if(discrete)
1544 {
1545 out_d(_("Discrete data with exactly %d classes!\n"), mclass);
1546 step = (max-min)/(REAL)(mclass-1);
1547 for (i=0; i<mclass; i++) {
1548 classval[i] = min + (step*(REAL)i);
1549 }
1550 } else
1551 {
1552 step = (max-min)/(REAL)(mclass);
1553 for (i=0; i<mclass; i++) {
1554 classval[i] = min + (step/2.) + (step*(REAL)i);
1555 }
1556 }
1557
1558 if ((!thist) && (!noplot)){
1559 if(discrete)
1560 plot_histo_discrete(classval, aclass, mclass, step, x, get_label(x));
1561 else
1562 plot_histo(classval, aclass, mclass, step, x, get_label(x));
1563 }
1564 else {
1565 if( (!bernhard)|| ( bernhard && thist ))
1566 {
1567 out_r(_("\nHistogram (class center - number of values)\n") );
1568 print_histo(classval, aclass, mclass);
1569 }
1570 }
1571 out_end();
1572 } /* end of histogram() */
1573
1574
1575
1576 void standard(REAL x[], int n, int mclass, REAL min, REAL max) {
1577 REAL mean, sdv, sdv_mean, var_coef, median;
1578 REAL *xh, t, conf;
1579 REAL q_l, q_u, index, no_u, no_l;
1580 int i, i_l, i_u;
1581
1582 int xcrit;
1583
1584 /* critical values for 95% confidence intervals of the Medians (from NEAVE):*/
1585 int critical[50] = {
1586 -1, -1, -1, -1, -1, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3,
1587 3, 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9, 10, 10, 11,
1588 11, 12, 12, 12, 13, 13, 14, 14, 15, 15, 15, 16, 16, 17, 17
1589 };
1590 REAL med_conf_lo, med_conf_up;
1591
1592 /* if (mclass==0) {
1593 min = get_min(x,n);
1594 max = get_max(x,n);
1595 }*/
1596
1597
1598 out_start();
1599 if (max==min) {
1600 if(!bernhard) {
1601 out_err(MAT, ERR_FILE, ERR_LINE, _("All values are equal!") );
1602 } else
1603 {
1604 colorize(ClHeader);
1605 out_r(_("#Result general statistical information in a table\n") );
1606 colorize(ClDefault);
1607 out_r(_("#All values are equal!\n") );
1608 out_r("#n\tmean\tm-conf\tm+conf\tmedian\tme_c_lo\tme_c_up"
1609 "\tquar_lo\tquar_up\tsdv\tvarc(%%)\tsdv_err\tmin\tmax\n");
1610 out_r("%i\t", n);
1611 out_r("%g\t%g\t%g\t", min, min, min);
1612 out_r("%g\t%g\t%g\t", min, min, min);
1613 out_r("%g\t", min);
1614 out_r("%g\t", min);
1615 out_r("%g\t", 0);
1616 out_r("%f\t", 0);
1617 out_r("%g\t", 0);
1618 out_r("%g\t", get_min(x,n));
1619 out_r("%g\t\n", get_max(x,n));
1620
1621 }
1622
1623 out_end();
1624 return;
1625 }
1626 if (min > max) {
1627 out_err(MAT, ERR_FILE, ERR_LINE,
1628 _("Minimum is larger than maximum!") );
1629 out_end();
1630 return;
1631 }
1632
1633 mean = get_mean(x,n);
1634 sdv = get_sdv(x,n);
1635 sdv_mean = sdv/sqrt((REAL)n);
1636 var_coef = (sdv/mean) * 100.;
1637 t = get_t(0.975, (n-1));
1638 conf = (t*sdv)/sqrt((REAL)n);
1639
1640
1641 histogram(x,n,mclass,min,max); /* calc and plot the histogram */
1642
1643
1644 xh = (REAL*)m_calloc(n, sizeof(REAL));
1645
1646 for (i=0; i<n; i++) {
1647 xh[i] = x[i];
1648 }
1649
1650 qsort(xh, n, sizeof(REAL), real_compar_up);
1651
1652 if (n % 2 == 1) {
1653 i = (n-1)/2;
1654 median = xh[i];
1655 }
1656 else {
1657 i = (n/2) -1;
1658 median = xh[i];
1659 i++;
1660 median = (median + xh[i])/2.;
1661 }
1662
1663 index = (REAL)n*0.25;
1664 if (index==floor(index)) {
1665 i_l = (int)index-1;
1666 i_u = (int)index;
1667 }
1668 else {
1669 i_l = (int) floor(index-1.);
1670 i_u = (int) ceil(index-1.);
1671 }
1672
1673 q_l = 0.5 * (xh[i_l]+xh[i_u]);
1674 q_u = 0.5 * (xh[n-i_l-1]+xh[n-i_u-1]);
1675 no_u = median + 1.58*(q_u-q_l)/sqrt((REAL)n);
1676 no_l = median - 1.58*(q_u-q_l)/sqrt((REAL)n);
1677
1678 if (n<6) {
1679 med_conf_lo = med_conf_up = 0.0;
1680 }
1681 else if (n<=50) {
1682 med_conf_lo = xh[critical[n-1]];
1683 med_conf_up = xh[n-critical[n-1]-1];
1684 }
1685 else {
1686 xcrit = (int)floor(-(0.5*sqrt((REAL)n)) *1.96-0.5+0.5*(REAL)n);
1687 med_conf_lo = xh[xcrit];
1688 med_conf_up = xh[n-xcrit-1];
1689 }
1690 /*
1691 printf("crit(50)=%i\n", (int)floor(-(0.5*sqrt(50.0)) *1.96-0.5+0.5*50.0));
1692 */
1693 if(!bernhard)
1694 {
1695 colorize(ClHeader);
1696 out_r(_("\nResult general statistical information:\n") );
1697 colorize(ClDefault);
1698 out_r(_("Count : %i\n"), n);
1699 out_r(_("Mean : %g [%g, %g] (95%%)\n"),
1700 mean, mean-conf, mean+conf);
1701 out_r(_("Median : %g [%g, %g] (95%%)\n"),
1702 median, med_conf_lo, med_conf_up);
1703 /* out_r(" : [%g, %g] (90%%)\n",
1704 no_l, no_u);
1705 */
1706 out_r(_("25%% quartile : %g\n"), q_l);
1707 out_r(_("75%% quartile : %g\n"), q_u);
1708 out_r(_("Standard deviation : %g\n"), sdv);
1709 out_r(_("Variation coefficient : %f %%\n"), var_coef);
1710 out_r(_("Standard error of mean : %g\n"), sdv_mean);
1711 out_r(_("Minimum : %g\n"), get_min(x,n));
1712 out_r(_("Maximum : %g\n\n"), get_max(x,n));
1713 }else
1714 {
1715 colorize(ClHeader);
1716 out_r(_("#Result general statistical information in a table\n") );
1717 colorize(ClDefault);
1718 out_r("#n\tmean\tm-conf\tm+conf\tmedian\tme_c_lo\tme_c_up"
1719 "\tquar_lo\tquar_up\tsdv\tvarc(%%)\tsdv_err\tmin\tmax\n");
1720 out_r("%i\t", n);
1721 out_r("%g\t%g\t%g\t", mean, mean-conf, mean+conf);
1722 out_r("%g\t%g\t%g\t", median, med_conf_lo, med_conf_up);
1723 out_r("%g\t", q_l);
1724 out_r("%g\t", q_u);
1725 out_r("%g\t", sdv);
1726 out_r("%f\t", var_coef);
1727 out_r("%g\t", sdv_mean);
1728 out_r("%g\t", get_min(x,n));
1729 out_r("%g\t\n", get_max(x,n));
1730
1731 }
1732 out_end();
1733
1734 }
1735
1736 /* =================================================================== */
1737
1738
1739 void rank_order(REAL x[], REAL y[], int n) {
1740 REAL rho, z, alpha, a;
1741 int i, df;
1742 /* BOOLEAN infinity = TRUE; */
1743
1744 /* critical raw-Values for significance levels 1%, 2%, 5% and 10% for n <=30
1745 indexes (two-tailed), is valid only for n >= 5! (from: SACHS, p. 230)
1746 */
1747
1748 const float sig[31][4] = {
1749 {0.0000,0.0000,0.0000,0.0000}, {0.0000,0.0000,0.0000,0.0000},
1750 {0.0000,0.0000,0.0000,0.0000}, {0.0000,0.0000,0.0000,0.0000},
1751 {0.0000,0.0000,0.0000,0.0000}, {0.9001,0.9000,0.9000,0.8000},
1752 {0.9429,0.8857,0.8286,0.7714}, {0.8929,0.8571,0.7450,0.6786},
1753 {0.8571,0.8095,0.6905,0.5952}, {0.8167,0.7667,0.6833,0.5833},
1754 {0.7818,0.7333,0.6364,0.5515}, {0.7454,0.7000,0.6091,0.5273},
1755 {0.7273,0.6713,0.5804,0.4965}, {0.6978,0.6429,0.5549,0.4780},
1756 {0.6474,0.6220,0.5341,0.4593}, {0.6536,0.6000,0.5179,0.4429},
1757 {0.6324,0.5824,0.5000,0.4265}, {0.6152,0.5637,0.4853,0.4118},
1758 {0.5975,0.5480,0.3994,0.3994}, {0.5825,0.5333,0.4579,0.3895},
1759 {0.5684,0.5203,0.4451,0.3789}, {0.5545,0.5078,0.4351,0.3688},
1760 {0.5426,0.4963,0.4241,0.3597}, {0.5306,0.4852,0.4150,0.3518},
1761 {0.5200,0.4748,0.4061,0.3435}, {0.5100,0.4654,0.3977,0.3362},
1762 {0.5002,0.4564,0.3894,0.3299}, {0.4915,0.4481,0.3822,0.3236},
1763 {0.4828,0.4401,0.3749,0.3175}, {0.4744,0.4320,0.3685,0.3113},
1764 {0.4665,0.4251,0.3620,0.3059}
1765 };
1766
1767
1768 if ((rho=get_rank_correlation(x, y, n))==REAL_MIN) {
1769 return;
1770 }
1771
1772 out_start();
1773 df = n-2;
1774 colorize(ClHeader);
1775 out_r(_("\nResult SPEARMAN's Rank-Correlation:\n") );
1776 colorize(ClDefault);
1777 out_r(_("rho = %f\n"), rho);
1778 out_r(_("Degrees of freedom = n-2 = %i\n\n"), df);
1779
1780 out_r(_("Hypothesis H0: rho=0 versus hypothesis H1: rho#0 (->two-sided)\n"));
1781
1782 if (n<5) {
1783 out_r(_("Test not possible since n<5 (too few values!)\n\n") );
1784 out_end();
1785 return;
1786 }
1787 else if ((n>=5) && (n<=30)) {
1788 for (i=0; i<4; i++) {
1789 if (fabs(rho) > (REAL)sig[n][i]) {
1790 break;
1791 }
1792 }
1793 switch (i) {
1794 case 0: alpha = 0.01;
1795 break;
1796 case 1: alpha = 0.02;
1797 break;
1798 case 2: alpha = 0.05;
1799 break;
1800 case 3: alpha = 0.10;
1801 break;
1802 default: alpha = 1.0;
1803 break;
1804 }
1805
1806 if (alpha < 1.0) {
1807 out_r(_("H0 rejected at a level of significance of %4.2f\n\n"),
1808 alpha);
1809 }
1810 else {
1811 out_r(_("Alpha > 0.1 ==> H0 can not be rejected\n\n") );
1812 }
1813 }
1814 else {
1815 /*
1816 a = get_norm_int(2.326);
1817 alpha = 1.- ((1.-a)*2.0);
1818 out_d("norm(2.326)=%f\n", alpha);
1819 */
1820 z = fabs(rho) * sqrt((REAL)n-1.0);
1821 out_r(_("Significance checked using the normal distribution\n") );
1822 out_d("z = %f\n", z);
1823 a = get_norm_int(z);
1824 alpha = 1.- ((1.-a)*2.0);
1825 out_r(_("Probability of H0 = %6.4f\n\n"), 1.0-alpha);
1826 }
1827 out_end();
1828 }
1829
1830 /* =================================================================== */
1831
1832 void t_test(REAL x[], int nx, REAL y[], int ny) {
1833 REAL denom, t, x_mean, y_mean;
1834 REAL x_sum, y_sum, x_qsum, y_qsum, alpha, a;
1835 REAL x_qmean, y_qmean;
1836 int df;
1837 /* t-Test for difference between two means of two samples */
1838
1839 x_sum = get_sum(x,nx);
1840 y_sum = get_sum(y,ny);
1841 x_qsum = get_qsum(x,nx);
1842 y_qsum = get_qsum(y,ny);
1843
1844 x_mean = x_sum/(REAL)nx;
1845 y_mean = y_sum/(REAL)ny;
1846
1847
1848 DIV(x_qmean, SQR(x_sum), (REAL)nx);
1849 DIV(y_qmean, SQR(y_sum), (REAL)ny);
1850 DIV(denom, (x_qsum - x_qmean + y_qsum - y_qmean), (REAL)(nx + ny - 2));
1851 denom *= (1./nx + 1./ny);
1852 DIV(t, fabs(x_mean - y_mean), sqrt(denom));
1853 df = nx + ny -2;
1854
1855 out_start();
1856 colorize(ClHeader);
1857 out_r(_("\nResult t-Test for independent samples\n") );
1858 colorize(ClDefault);
1859 out_r(_("Degrees of freedom = n1 + n2 - 2 = %i\n"), df);
1860 if (t !=0 ) {
1861 alpha = get_t_int(fabs(t), df);
1862 out_r("t = %f\n", t);
1863 out_r(_("\nHypothesis H0: mu1=mu2 versus hypothesis H1: mu1#mu2 (->two-sided)\n") );
1864 a = alpha;
1865 out_r(_("Probability of H0 = %6.4f\n\n"), 1.0-a);
1866 }
1867 else {
1868 out_r(_("t-Test not possible since t = 0!\n") );
1869 }
1870
1871 /*
1872 out_r("Hypothese H0: mu1=mu2 gegen Hypothese H1: mu1>mu2 (->einseitig)\n");
1873 sgn = get_sgn(t);
1874 a = 1.- 0.5*(1. + alpha*sgn);
1875 out_r("Wahrscheinlichkeit fuer H0 = %6.4f\n\n", 1.0-a);
1876
1877 out_r("Hypothese H0: mu1=mu2 gegen Hypothese H1: mu1<mu2 (->einseitig)\n");
1878 a = 1.- 0.5*(1. - alpha*sgn);
1879 out_r("Wahrscheinlichkeit fuer H0 = %6.4f\n\n", 1.0-a);
1880 */
1881 out_end();
1882 }
1883
1884 /* ====================================================================== */
1885
1886
1887
1888 void pair_t_test(REAL x[], REAL y[], int n) {
1889 REAL t, alpha, a, dif_sdv, dif_mean, *dif;
1890 int df, i;
1891 /* t-Test for difference between two means of two paired samples */
1892
1893 dif = (REAL*)m_calloc(n, sizeof(REAL));
1894 for (i=0; i<n; i++) {
1895 dif[i] = x[i]-y[i];
1896 }
1897 dif_sdv = get_sdv(dif, n);
1898 dif_mean = get_mean(dif, n);
1899 DIV(t, dif_mean*sqrt((REAL)n), sqrt(SQR(dif_sdv)));
1900 t = fabs(t);
1901 /* t = fabs( (dif_mean*sqrt((REAL)n))/(sqrt(SQR(dif_sdv))) ); */
1902 df = n-1;
1903
1904 out_start();
1905 colorize(ClHeader);
1906 out_r(_("\nResult t-Test for pairwise ordered samples\n") );
1907 colorize(ClDefault);
1908 out_r(_("Degrees of freedom n-1 = %i\n"), df);
1909 if (t !=0 ) {
1910 alpha = get_t_int(fabs(t), df);
1911 out_r("t = %f\n", t);
1912 out_r(_("\nHypothesis H0: mu1=mu2 versus hypothesis H1: mu1#mu2 (->two-sided)\n") );
1913 a = alpha;
1914 out_r(_("Probability of H0 = %6.4f\n\n"), 1.0-a);
1915 }
1916 else {
1917 out_r(_("t-Test not possible since t = 0!\n") );
1918 }
1919
1920 /*
1921 out_r("Hypothese H0: mu1=mu2 gegen Hypothese H1: mu1>mu2 (->einseitig)\n");
1922 sgn = get_sgn(t);
1923 a = 1.- 0.5*(1. + alpha*sgn);
1924 out_r("Wahrscheinlichkeit fuer H0 = %6.4f\n\n", 1.0-a);
1925
1926 out_r("Hypothese H0: mu1=mu2 gegen Hypothese H1: mu1<mu2 (->einseitig)\n");
1927 a = 1.- 0.5*(1. - alpha*sgn);
1928 out_r("Wahrscheinlichkeit fuer H0 = %6.4f\n\n", 1.0-a);
1929 */
1930 out_end();
1931 }
1932
1933 /* ====================================================================== */
1934
1935
1936
1937 void u_test(REAL x[], int nx, REAL y[], int ny) {
1938
1939 SORTREC *vrec;
1940 int i, k, j, n;
1941 REAL m, ux, uy, rx, ry, umin, z, alpha, t, denom, arg;
1942
1943 n = nx+ny;
1944
1945 vrec = (SORTREC*)m_calloc(n, sizeof(SORTREC));
1946
1947 for (i=0; i<nx; i++) {
1948 vrec[i].val = x[i];
1949 vrec[i].ind = 0;
1950 }
1951
1952 for (i=nx; i<n; i++) {
1953 vrec[i].val = y[i-nx];
1954 vrec[i].ind = 1;
1955 }
1956
1957 qsort(vrec, n, sizeof(SORTREC), u_rank_compar);
1958
1959 i = 0;
1960 k = 0;
1961 m = 0.;
1962 t = 0.;
1963 while (i<n) {
1964 vrec[i].rank = (REAL)i+1.;
1965 if ( (i<(n-1)) && (equal(vrec[i].val, vrec[i+1].val)) ) {
1966 k++;
1967 m += (REAL)i;
1968 }
1969 else {
1970 if (k!=0) {
1971 m += (REAL)i;
1972 k ++;
1973 /* t += (pow((REAL)k, 3.) - (REAL)k)/12.; */
1974 t += ( (REAL)k * (SQR((REAL)k) -1.) ) /12.;
1975 m = m/ (REAL)(k);
1976 for (j=i; j>(i-k); j--) {
1977 vrec[j].rank = m+1.;
1978 }
1979 }
1980 k=0;
1981 m=0.;
1982 }
1983 i++;
1984 }
1985
1986 rx = 0.0;
1987 ry = 0.0;
1988
1989 for (i=0; i<n; i++) {
1990 if (vrec[i].ind == 0) {
1991 rx += vrec[i].rank;
1992 }
1993 else {
1994 ry += vrec[i].rank;
1995 }
1996 /*
1997 out_r("Nr %i ind %i Wert %f Rank %f\n",
1998 i, vrec[i].ind, vrec[i].val, vrec[i].rank);
1999 */
2000 }
2001
2002 ux = (REAL)nx*(REAL)ny + ( (REAL)nx*(REAL)(nx+1) )/2. - rx;
2003 uy = (REAL)nx*(REAL)ny + ( (REAL)ny*(REAL)(ny+1) )/2. - ry;
2004
2005 umin = (ux<uy) ? ux : uy;
2006 /* z = fabs(umin-((REAL)nx*(REAL)ny)/2.) /
2007 sqrt( ( (REAL)nx*(REAL)ny*( (REAL)nx+(REAL)ny+1.) )/12.);
2008 */
2009 arg = (REAL)nx*(REAL)ny/((REAL)n*(REAL)(n-1))*((REAL)n*(SQR((REAL)n)-1.)/12. -t);
2010 SQRT(denom, arg);
2011 DIV(z, fabs(umin-((REAL)nx*(REAL)ny)/2.), denom);
2012 /*
2013 z = fabs(umin-((REAL)nx*(REAL)ny)/2.) /
2014 sqrt( ( (REAL)nx*(REAL)ny/((REAL)n*(REAL)(n-1)) *
2015 ( (REAL)n * (SQR((REAL)n)-1.)/12. -t ) ) );
2016 */
2017 out_start();
2018 colorize(ClHeader);
2019 out_r(_("\nResult u-Test:\n") );
2020 colorize(ClDefault);
2021 out_r("Rx = %f Ry = %f\n", rx, ry);
2022 out_r("Ux = %f Uy = %f\n", ux, uy);
2023 out_r("nx = %i ny = %i\n", nx, ny);
2024 out_r("U = %f\n", umin);
2025
2026 out_r(_("\nHypothesis H0: x and y originate from the same set versus\n") );
2027 /* Test on x > y if ux < uy. */
2028 if( ux < uy ) {
2029 out_r(_("Hypothesis H1: x stochastically larger than y (-> one-sided test) or\n") );
2030
2031 } else { /* ux >= uy */
2032 out_r(_("Hypothesis H1: x stochastically smaller than y (-> one-sided test) or\n") );
2033 }
2034 out_r(_(" x is different from y (-> two-sided test)\n\n") );
2035
2036
2037 /* if ( ((nx>=8) && (ny>=8)) || (nx>20) || (ny>20) ) { */
2038 if ( ((nx<7) || (ny<7)) || (nx<=19) || (ny<=19) ) {
2039 out_r(_("Warning: Only rough approximation of probability possible!\n") );
2040 out_r(_("Please check exact probability of alpha for h having %i "
2041 "degrees of freedom\n"), ncol-1);
2042 out_r(_("in the literature, e.g. in Table 16/17, pp. 599 in WEBER \n\n") );
2043 }
2044
2045 out_r(_("Normally distributed random variable z = %f\n"), z);
2046 out_r(_("Correction term for equal ranks t = %f\n"), t);
2047 alpha = get_norm_int(z);
2048 out_r(_("Probability of H0 (one-sided) = %6.4f\n"), 1.0-alpha);
2049 alpha = 1.- ((1.-alpha)*2.);
2050 out_r(_("Probability of H0 (two-sided) = %6.4f\n\n"), 1.0-alpha);
2051 out_end();
2052 }
2053
2054
2055 /* ====================================================================== */
2056
2057
2058 void kruskal_test(PREAL x[], int nrow[], int ncol) {
2059 SORTREC *vrec;
2060 unsigned int n=0, ir, ic, k=0, i, j;
2061 REAL *r, nr, t, m, cor, prob, h=0.;
2062
2063 for (ic=0; ic<ncol; ic++) {
2064 n += nrow[ic];
2065 }
2066
2067 vrec = (SORTREC*)m_calloc(n, sizeof(SORTREC));
2068 r = (REAL*)m_calloc(ncol, sizeof(REAL));
2069
2070 for (ic=0; ic<ncol; ic++) {
2071 r[ic] = 0.;
2072 for (ir=0; ir<nrow[ic]; ir++) {
2073 vrec[k].val = x[ic][ir];
2074 vrec[k].ind = ic;
2075 k++;
2076 }
2077 }
2078 out_start();
2079 #ifdef DEBUG
2080 if (n != k) {
2081 out_r(_("Error: n!=k\n") );
2082 }
2083 #endif
2084
2085 qsort(vrec, n, sizeof(SORTREC), u_rank_compar);
2086
2087 i = 0;
2088 k = 0;
2089 m = 0.;
2090 t = 0.;
2091 while (i<n) {
2092 vrec[i].rank = (REAL)i+1.;
2093 if ( (i<(n-1)) && (equal(vrec[i].val, vrec[i+1].val)) ) {
2094 k++;
2095 m += (REAL)i;
2096 }
2097 else {
2098 if (k!=0) {
2099 m += (REAL)i;
2100 k ++;
2101 t += (REAL)k * (SQR((REAL)k)-1.);
2102 m = m/ (REAL)(k);
2103 for (j=i; j>(i-k); j--) {
2104 vrec[j].rank = m+1.;
2105 }
2106 }
2107 k=0;
2108 m=0.;
2109 }
2110 i++;
2111 }
2112
2113 for (i=0; i<n; i++) {
2114 r[vrec[i].ind] += vrec[i].rank;
2115 }
2116
2117
2118 nr = (REAL)n;
2119 cor = 1. - t/(SQR(nr)*(nr-1.));
2120
2121 for (ic=0; ic<ncol; ic++) {
2122 h += (SQR(r[ic])/(REAL)nrow[ic]);
2123 /* out_r("r[%u] = %g\n", ic, r[ic]); */
2124 }
2125
2126 h = (12./(nr*(nr+1.)) * h - 3.*(nr+1.))/cor;
2127
2128 colorize(ClHeader);
2129 out_r(_("\nResult Kruskal-Wallis-Test:\n") );
2130 colorize(ClDefault);
2131 out_r(_("Chi^2-distributed random variable H = %g\n"), h);
2132 out_r(_("Correction term for equal ranks (ties) = %g\n"), cor);
2133 out_r(_("Degrees of freedom = %i\n"), ncol-1);
2134
2135 out_r(_("\nHypothesis H0: Samples originate from the same set versus\n") );
2136 out_r(_("Hypothesis H1: Samples do not originate from the same set\n") );
2137
2138 /* if ((ncol>=3) && (nrow[0]>=5) && (nrow[1]>=5) && nrow[2]>=5) { */
2139 if ((ncol<2) || (nrow[0]<4) || (nrow[1]<4) || nrow[2]<4) {
2140 out_r(_("Warning: Only rough approximation of probability possible!\n") );
2141 out_r(_("Please check exact probability of alpha for h having %i degrees "
2142 "of freedom\n"), ncol-1);
2143 out_r(_("in the literature, e.g. in Table 16/17, pp. 599 in WEBER \n\n") );
2144 }
2145 if (h > 0.0) {
2146 prob = get_chi_int(h, (ncol-1));
2147 out_r(_("Probability alpha for H0 = %6.4f\n\n"), prob);
2148 }
2149 else {
2150 out_err(MAT, ERR_FILE, ERR_LINE,
2151 _("Calculation of Chi^2-distribution not possible since h<0!") );
2152 }
2153 out_end();
2154 }
2155
2156
2157 /* ====================================================================== */
2158
2159 void vierfeld_test(REAL x[], REAL y[], int n) {
2160 const REAL pi=3.14159265358979323846;
2161 long unsigned int a=0, b=0, c=0, d=0, i, xi, yi;
2162 REAL chi, r, T, prob, alpha, delta, beta, gamma;
2163
2164 out_start();
2165 if (n!=2) {
2166 out_r(_("Characteristics are counted (1='existent', 0='not existent')\n\n"));
2167 for (i=0; i<n; i++) {
2168 xi = get_round(x[i]);
2169 yi = get_round(y[i]);
2170 if ( ((xi!=1) && (xi!=0)) || ((yi!=1) && (yi!=0)) ) {
2171 out_err(ERR, ERR_FILE, ERR_LINE,
2172 _("Columns must contain only 0's and 1's!") );
2173 out_end();
2174 return;
2175 }
2176 if ( xi && yi) { a++; }
2177 if (!xi && yi) { b++; }
2178 if ( xi && !yi) { c++; }
2179 if (!xi && !yi) { d++; }
2180 }
2181 }
2182 else {
2183 out_r(_("Values are being interpreted as fourfold-table:\n\n") );
2184 a = (unsigned int) x[0];
2185 b = (unsigned int) y[0];
2186 c = (unsigned int) x[1];
2187 d = (unsigned int) y[1];
2188 n = a+b+c+d;
2189 }
2190
2191 out_r( _("Fourfold-table:\n\n") );
2192 out_r( _(" | | A existent | A not existent |\n"));
2193 out_r(" +-------------------+---------------+---------------------+\n");
2194 out_r( _(" | B existent | a | b |\n"));
2195 out_r( _(" | B not existent | c | d |\n"));
2196 out_r(" +-------------------+---------------+---------------------+\n\n");
2197
2198 alpha = ((REAL)(a+b) * (REAL)(a+c))/(REAL)n;
2199 delta = ((REAL)(d+b) * (REAL)(d+c))/(REAL)n;
2200 beta = ((REAL)(a+b) * (REAL)(b+d))/(REAL)n;
2201 gamma = ((REAL)(c+d) * (REAL)(a+c))/(REAL)n;
2202
2203 if ((alpha>=5.) && (delta>=5.) && (beta>=5.) && (gamma>=5.)) {
2204 chi = (SQR( (REAL)a*(REAL)d - (REAL)b*(REAL)c ) *(REAL)n) /
2205 ( (REAL)(a+b) * (REAL)(c+d) * (REAL)(a+c) * (REAL)(b+d) );
2206 }
2207 else {
2208 out_r(_("Using according to YATES corrected Chi^2 value, since frequencies <= 5 expected!\n\n") );
2209
2210 chi = (SQR(fabs( (REAL)a*(REAL)d - (REAL)b*(REAL)c) -0.5*(REAL)n)*(REAL)n)/
2211 ((REAL)(a+b) * (REAL)(c+d) * (REAL)(a+c) * (REAL)(b+d));
2212 }
2213 T = (((REAL)a*(REAL)d - (REAL)b*(REAL)c))/
2214 (sqrt( (REAL)(a+b) * (REAL)(c+d) * (REAL)(a+c) * (REAL)(b+d) ));
2215 r = sin(T*(pi/4.));
2216
2217 out_r(_("observed: a=%lu, b=%lu, c=%lu, d=%lu, n=%i\n"),
2218 a, b, c, d, n);
2219 out_r(_("expected: a=%4.2f, b=%4.2f, c=%4.2f, d=%4.2f, n=%i\n"),
2220 alpha, beta, gamma, delta, n);
2221 out_r( _("Chi^2 = %f\n"), chi);
2222 out_r(_("Contingency coefficient r (according to HAMMING) = %f\n\n"), r);
2223 out_r(_("Chi^2-test:\n") );
2224 out_r(_("Hypothesis H0: Both items are independent (uncorrelated)\n") );
2225 out_r(_("versus H1: Both items are dependent (correlated)\n") );
2226 prob = get_chi_int(chi, 1);
2227 out_r(_("Probability of H0: %6.4f\n\n"), prob);
2228 out_end();
2229 }
2230
2231
2232 /* ====================================================================== */
2233
2234
2235 void tafel_test(PREAL xx[], int nrow, int ncol) {
2236 REAL fieldsum=0.0, sum=0.0, *colsum, *rowsum, lncols=0.0, lnrows=0.0,
2237 prob, g, fsum, chi=0.0;
2238 int i, k, df, max_err = 100, zero_cols=0, zero_rows=0;
2239 PREAL *e;
2240 BOOLEAN freq_too_small=FALSE, printall=TRUE;
2241
2242 colsum = (REAL*)m_calloc(ncol, sizeof(REAL));
2243 rowsum = (REAL*)m_calloc(nrow, sizeof(REAL));
2244 e = (PREAL*) m_calloc(ncol, sizeof(PREAL));
2245 for (i=0; i<ncol; i++) {
2246 e[i] = (REAL*)m_calloc(nrow, sizeof(REAL));
2247 }
2248
2249 out_start();
2250 for (i=0; i<ncol; i++) {
2251 colsum[i] = 0.0;
2252 for (k=0; k<nrow; k++) {
2253 if(xx[i][k] < 0.0){
2254 out_err(MAT, ERR_FILE, ERR_LINE,
2255 _("Column \"%s\", line %i, has value < 0"),
2256 get_name(xx[i]), k);
2257 return;
2258 }
2259 fieldsum += xx[i][k] * get_ln_0(xx[i][k]);
2260 sum += xx[i][k];
2261 colsum[i] += xx[i][k];
2262 }
2263 if (colsum[i] == 0.0) {
2264 zero_cols ++;
2265 }
2266 out_r("\n");
2267 }
2268
2269 for (k=0; k<nrow; k++) {
2270 rowsum[k] = 0.0;
2271 for (i=0; i<ncol; i++) {
2272 rowsum[k] += xx[i][k];
2273 }
2274 if (rowsum[k] == 0.0) {
2275 zero_rows ++;
2276 }
2277 }
2278
2279 for (k=0; k<nrow; k++) {
2280 for (i=0; i<ncol; i++) {
2281 e[i][k] =(colsum[i]*rowsum[k])/sum;
2282 if (e[i][k] != 0.0) {
2283 chi += SQR(xx[i][k]-e[i][k])/e[i][k];
2284 }
2285 if (e[i][k] < 5.) {
2286 freq_too_small = TRUE;
2287 }
2288 }
2289 }
2290
2291
2292 out_r(_("Analysis of two-items table:\n\n") );
2293
2294 for (i=0; i<ncol; i++) {
2295 if(colsum[i] < 0.0){
2296 out_err(MAT, ERR_FILE, ERR_LINE,
2297 _("Sum of values of column \"%s\" is lower than 0"),
2298 get_name(xx[i]));
2299 return;
2300 }
2301 lncols += colsum[i] * get_ln_0(colsum[i]);
2302 }
2303
2304 for (k=0; k<nrow; k++) {
2305 if(rowsum[k] < 0.0){
2306 out_err(MAT, ERR_FILE, ERR_LINE,
2307 _("Sum of values of row %i is lower than 0"), k);
2308 return;
2309 }
2310 lnrows += rowsum[k] * get_ln_0(rowsum[k]);
2311 }
2312
2313 if(sum < 0.0){
2314 out_err(MAT, ERR_FILE, ERR_LINE,
2315 _("Sum of all values of all columns is lower than 0"));
2316 return;
2317 }
2318 fsum = sum * get_ln_0(sum);
2319 g = 2 * (fieldsum-lncols-lnrows+fsum);
2320 df = (ncol-1-zero_cols) * (nrow-1-zero_rows);
2321
2322 /*
2323 out_r("Transformierte Zeilensummen = %f\n", lnrows);
2324 out_r("Transformierte Spaltensummen = %f\n", lncols);
2325 out_r("Transformierte Haeufigkeiten = %f\n", fieldsum);
2326 out_r("Transformierte Gesamtsumme = %f\n", fsum);
2327
2328 */
2329
2330
2331 out_r( _(" Item A | Item B \n") );
2332 out_r(" |");
2333 for (i=0; i<ncol; i++) {
2334 out_r(_("Class%3i |"), (i+1));
2335 }
2336 out_r(_(" Sum |\n") );
2337 for (i=-2; i<ncol; i++) {
2338 out_r("----------+");
2339 }
2340 out_r("\n");
2341
2342
2343 for (k=0; k<nrow; k++) {
2344 if(k == max_err){
2345 printall = FALSE;
2346 out_i(_("There are %i rows to be printed yet. "
2347 "Do you want to see them? (%s) "), (nrow - max_err), _("y/N"));
2348 GETNLINE;
2349 if(!(empty) && (line[0] ==_("y")[0] || line[0] != _("Y")[0])){
2350 printall = TRUE;
2351 }
2352 }
2353 if(printall){
2354 out_r(_("Class%3i |"), k+1);
2355 for (i=0; i<ncol; i++) {
2356 out_r("%7u |", (unsigned int)xx[i][k]);
2357 }
2358 out_r("%7i |\n", (unsigned int)rowsum[k]);
2359 out_r(_("exp. frq. |") );
2360 for (i=0; i<ncol; i++) {
2361 out_r("%7u |", (unsigned int)e[i][k]);
2362 }
2363 out_r(" |\n");
2364 if (k<(nrow-1)) {
2365 for (i=-2; i<ncol; i++) {
2366 out_r("----------+");
2367 }
2368 out_r("\n");
2369 }
2370 }
2371 }
2372
2373 for (i=-2; i<ncol; i++) {
2374 out_r("----------+");
2375 }
2376 out_r(_("\n Sum |") );
2377 for (i=0; i<ncol; i++) {
2378 out_r("%7i |", (unsigned int)colsum[i]);
2379 }
2380 out_r("%7i |\n", (unsigned int)sum);
2381
2382 out_r( _("\nChi^2 = %f\n"), chi);
2383 out_r(_("G (check value for Chi^2-Test) = %f\n"), g);
2384 out_r(_("Degrees of freedom = %i\n\n"), df);
2385 if (freq_too_small) {
2386 out_r(_("Warning: Expected frequencies < 5!\n\n") );
2387 }
2388
2389 out_r(_("Chi^2-test:\n") );
2390 out_r(_("Hypothesis H0: Both items are independent (uncorrelated)\n") );
2391 out_r(_("versus H1: Both items are dependent (correlated)\n") );
2392 prob = get_chi_int(g, df);
2393 out_r(_("Probability of H0: %6.4f\n\n"), prob);
2394 out_end();
2395
2396 }
2397
2398
2399 /* ====================================================================== */
2400
2401 void wilcoxon_test(REAL x[], REAL y[], int n) {
2402 SORTREC *vrec;
2403 int i, k, j, nv, alpha, navg, t;
2404 REAL *diff, *avg;
2405 REAL m, s_plus, s_minus, s_min, d, nr, nd, prob, z, median, conf_u, conf_l;
2406 const short int sig[20][3] = {
2407 { 0, -1, -1 }, { 2, 0, -1 }, { 4, 2, 0 }, { 6, 3, 2 },
2408 { 8, 5, 3 }, { 11, 7, 5 }, { 14, 10, 7 }, { 17, 13, 10 },
2409 { 21, 16, 13 }, { 25, 20, 16 }, { 30, 24, 20 }, { 35, 28, 23 },
2410 { 40, 33, 28 }, { 46, 38, 32 }, { 52, 43, 38 }, { 59, 49, 43 },
2411 { 66, 56, 49 }, { 73, 62, 55 }, { 81, 69, 61 },
2412 { 89, 77, 68 }
2413 };
2414
2415
2416 diff = (REAL*) m_calloc(n, sizeof(REAL));
2417 vrec = (SORTREC*)m_calloc(n, sizeof(SORTREC));
2418
2419 nv = 0;
2420 for (i=0; i<n; i++) {
2421 d = x[i]-y[i];
2422 diff[i] = d;
2423 if (d != 0.0) {
2424 vrec[nv].val = d;
2425 nv ++;
2426 }
2427 }
2428
2429 if (nv == 0) {
2430 out_err(MWA, ERR_FILE, ERR_LINE,
2431 _("All value pairs are equal. WILCOXON-Test thus not possible/has no meaning."));
2432 return;
2433 }
2434
2435 qsort(vrec, nv, sizeof(SORTREC), wilcoxon_rank_compar);
2436
2437 i = 0;
2438 k = 0;
2439 m = 0.;
2440 while (i<nv) {
2441 vrec[i].rank = (REAL)i+1.;
2442 if ( (i<(nv-1)) && (equal(fabs(vrec[i].val), fabs(vrec[i+1].val))) ) {
2443 k++;
2444 m += (REAL)i;
2445 }
2446 else {
2447 if (k!=0) {
2448 m += (REAL)i;
2449 k ++;
2450 m = m / (REAL)(k);
2451 for (j=i; j>(i-k); j--) {
2452 vrec[j].rank = m+1.;
2453 }
2454 }
2455 k=0;
2456 m=0.;
2457 }
2458 i++;
2459 }
2460
2461 s_plus = 0.0;
2462 s_minus = 0.0;
2463 for (i=0; i<nv; i++) {
2464 /* printf("diff=%20.17f rang=%f\n", vrec[i].val, vrec[i].rank); */
2465 if (vrec[i].val > 0.0) {
2466 s_plus += vrec[i].rank;
2467 }
2468 else {
2469 s_minus += vrec[i].rank;
2470 }
2471 }
2472
2473 median = get_median(diff, n);
2474 navg = n*(n+1)/2;
2475 avg = (REAL*) m_calloc(navg, sizeof(REAL));
2476 nr = (REAL)nv;
2477 nd = (REAL)m;
2478
2479 k = 0;
2480 for (i=0; i<n; i++) {
2481 for (j=i; j<n; j++) {
2482 avg[k] = (diff[i] + diff[j])/2.0;
2483 k ++;
2484 }
2485 }
2486
2487 qsort(avg, navg, sizeof(REAL), real_compar_up);
2488 /* nr = 50; */
2489 if (n<26) {
2490 t = sig[n-6][2];
2491 }
2492 else {
2493 z = get_z(0.99);
2494 t = (int) (nd*(nd+1.0)/4.0 - z*sqrt(nd*(nr+1.0)*(2.0*nd+1.0)/24.0) - 0.5);
2495 }
2496 if ( (t<0) || (t>=navg) ) {
2497 t = 0;
2498 }
2499
2500 conf_l = avg[t];
2501 conf_u = avg[navg-t-1];
2502
2503 /*
2504 for (i=0; i<navg; i++) {
2505 printf("%i %f\n", i, avg[i]);
2506 }
2507
2508 printf("t= %i, z=%f\n", t, z);
2509
2510 mean = get_mean(diff, nv);
2511
2512
2513
2514 if (nv % 2 == 1) {
2515 i = (nv-1)/2;
2516 median = vrec[i].val;
2517 }
2518 else {
2519 i = (nv/2) -1;
2520 median = vrec[i].val;
2521 i++;
2522 median = (median + vrec[i].val)/2.;
2523 }
2524 */
2525
2526 s_min = (s_plus<s_minus) ? s_plus : s_minus;
2527 out_start();
2528 colorize(ClHeader);
2529 out_r(_("\nResult Wilcoxon-Test:\n") );
2530 colorize(ClDefault);
2531 out_r(_("Positive rank sum S+ : %g\n"), s_plus);
2532 out_r(_("Negative rank sum S- : %g\n"), s_minus);
2533 out_r(_("Smallest rank sum S : %g\n"), s_min);
2534 out_r(_("Number of value pairs : %i\n"), n);
2535 out_r(_("Size of the set : %i\n"), nv);
2536 out_r(_("Number 0-differences : %i\n"), n-nv);
2537 out_r(_("Mean of differences : %g +/- %g\n"), get_mean(diff, n),
2538 get_sdv(diff, n));
2539 out_r(_("Median of differences : %f [%f, %f] (99%%)\n\n"),
2540 median, conf_l, conf_u);
2541 /* out_r("Mittel der Differenzen: %f\n\n", mean); */
2542
2543 out_r(_("Hypothesis H0: x and y are 'treated' equally versus\n") );
2544 out_r(_("Hypothesis H1: x and y are 'treated' unequally (-> two-sided)\n\n"));
2545
2546 if (nv<6) {
2547 out_r(_("Calculation of probability not possible if n < 6!\n") );
2548 out_end();
2549 return;
2550 }
2551 else if (nv<26) {
2552 i = nv-6;
2553 out_r(_("Critical values for S (two-sided) from table:\n") );
2554 out_r(" 5%% 2%% 1%%\n");
2555 out_r(" %4hi %4hi %4hi\n", sig[i][0], sig[i][1], sig[i][2]);
2556
2557 if (s_min <= (REAL)sig[i][2]) {
2558 alpha = 1;
2559 }
2560 else if (s_min <= (REAL)sig[i][1]) {
2561 alpha = 2;
2562 }
2563 else if (s_min <= (REAL)sig[i][0]) {
2564 alpha = 5;
2565 }
2566 else {
2567 alpha = -1;
2568 }
2569
2570 if (alpha != -1) {
2571 out_r(_("H0 rejected at level of significance of %i%% (two-sided)\n\n"),
2572 alpha);
2573 }
2574 else {
2575 out_r(_("H0 can not be rejected\n\n") );
2576 }
2577 }
2578 /* else { */
2579
2580 z = (s_min - (nr*(nr+1.0))/4.0) /
2581 sqrt( (nr*(nr+1.0)*(2.0*nr+1.0))/24.0 );
2582
2583 out_r(_("normally distributed variable z = %f\n"), z);
2584 prob = get_norm_int(z);
2585 out_r(_("Probability of H0 (one-sided) = %6.4f\n"), prob);
2586 out_r(_("Probability of H0 (two-sided) = %6.4f\n\n"), prob*2.0);
2587 /* } */
2588 out_end();
2589 }
2590
2591 /* ====================================================================== */
2592
2593
2594 void outlier(int xx_ind, int n) {
2595 PREAL x = xx[xx_ind];
2596 FILE *outfile;
2597 char analias[80];
2598 int i, iy, iout, i_l, i_u;
2599 REAL index, q_l, q_u, w_l, w_u, m_l, m_u, max, min;
2600 REAL median, mean, no_l, no_u;
2601 REAL *xs;
2602
2603 if (get_min(x,n)==get_max(x,n)) {
2604 out_err(MAT, ERR_FILE, ERR_LINE, _("All values are equal!"));
2605 return;
2606 }
2607 xs = (REAL*) m_calloc(n, sizeof(REAL));
2608 for (i=0; i<n; i++) {
2609 xs[i] = x[i];
2610 }
2611 qsort(xs, n, sizeof(REAL), real_compar_up);
2612
2613 if (n % 2 == 1) {
2614 i = (n-1)/2;
2615 median = xs[i];
2616 }
2617 else {
2618 i = (n/2) -1;
2619 median = xs[i];
2620 i++;
2621 median = (median + xs[i])/2.;
2622 }
2623
2624 mean = get_mean(xs, n);
2625 max = xs[n-1];
2626 min = xs[0];
2627 index = (REAL)n*0.25;
2628 if (index==floor(index)) {
2629 i_l = (int)index-1;
2630 i_u = (int)index;
2631 }
2632 else {
2633 i_l = (int) floor(index-1.);
2634 i_u = (int) ceil(index-1.);
2635 }
2636
2637 q_l = 0.5 * (xs[i_l]+xs[i_u]);
2638 q_u = 0.5 * (xs[n-i_l-1]+xs[n-i_u-1]);
2639
2640 m_l = q_l - 1.5*(q_u-q_l);
2641 m_u = q_u + 1.5*(q_u-q_l);
2642
2643 w_l = max;
2644 w_u = min;
2645
2646 for (i=0; i<n; i++) {
2647 if ((xs[i]>w_u) && (xs[i]<=m_u)) {
2648 w_u = xs[i];
2649 }
2650 if ((xs[i]<w_l) && (xs[i]>=m_l)) {
2651 w_l = xs[i];
2652 }
2653 }
2654
2655 no_u = median + 1.58*(q_u-q_l)/sqrt((REAL)n);
2656 no_l = median - 1.58*(q_u-q_l)/sqrt((REAL)n);
2657
2658 if (!noplot) {
2659 plot_box(x, n, median, mean, q_l, q_u, w_l, w_u, no_l, no_u, get_label(x));
2660 }
2661
2662 out_start();
2663 iout = 0;
2664 for (i=0; i<n; i++) {
2665 if ((x[i]>w_u) || (x[i]<w_l)) {
2666 iout ++;
2667 out_r(_("Outliers: x[%i]=%f\n"), i+1, x[i]);
2668 }
2669 }
2670 out_r(_("\n%i possible outliers found\n\n"), iout);
2671
2672 if (iout == 0) {
2673 out_end();
2674 return;
2675 }
2676 #ifndef STATIST_X
2677 out_i(_("Delete outliers? (%s) "), _("y/N") );
2678 GETNLINE;
2679 if(!(empty) && (line[0]!=_("y")[0] || line[0] != _("Y")[0])){
2680 out_end();
2681 return;
2682 }
2683
2684 iy = 0;
2685 strcpy(analias, "out_");
2686 strncat(analias, alias[acol[0]], 79-strlen(analias));
2687 make_new_col(analias, nn[xx_ind]);
2688 outfile = tmpptr[ncol - 1];
2689 alloc_cols(1, 3);
2690 x = xx[xx_ind]; /* We redeclare `x' because alloc_cols delete all columns from
2691 memory */
2692 for (i=0; i<nn[xx_ind]; i++) {
2693 if (!((x[i]>w_u) || (x[i]<w_l))) {
2694 iy ++;
2695 FWRITE(&x[i], sizeof(REAL), 1, outfile);
2696 }
2697 else {
2698 iy ++;
2699 FWRITE(&SYSMIS, sizeof(REAL), 1, outfile);
2700 }
2701 }
2702
2703 out_r(_("%i possible outliers deleted\n"), iout);
2704 /* ncol is the number, but counting starts at 0
2705 so we have to substract one the the right one*/
2706 out_r(_("Created new column %s having %i values!\n\n"), alias[ncol-1], iy);
2707 #endif // ndef STATIST_X
2708 out_end();
2709 }
2710
2711 /* =================================================================== */
2712
2713 char *center(char result[80], char *str, int width){
2714 int i, j, l;
2715 width += strlen(str) - stringLen(str);
2716 for(i = 0; i < 80; i++)
2717 result[i] = 0;
2718 l = strlen(str);
2719 i = (width - l) / 2;
2720 for(j = 0; j < i; j++)
2721 result[j] = ' ';
2722 for(i = 0; i < l; i++){
2723 result[j] = str[i];
2724 j++;
2725 }
2726 while(j < width){
2727 result[j] = ' ';
2728 j++;
2729 }
2730 return result;
2731 }
2732
2733 void freq_table(){
2734 typedef struct {
2735 char *c[6]; /* the value, the frequency, and the percentages */
2736 void *next;
2737 } FreqRow;
2738
2739 FreqRow *row = NULL, *first_row = NULL;
2740 char b[20];
2741 char h[256], l[256];
2742 PREAL y = xx[acol[0]];
2743 REAL x, p, vp, ap = 0.0, vap = 0.0; /* value, global %, valid %,
2744 cumulated %, valid cumulated % */
2745 int i, n, m = 0, w[6], r; /* r = 3 because of the table header */
2746 BOOLEAN truncated = FALSE;
2747 qsort(y, nn[acol[0]], sizeof(REAL), real_compar_up);
2748
2749 /* Setting default column widths */
2750 w[0] = stringLen(_("Value")) + 4;
2751 w[1] = stringLen(_("N")) + 2;
2752 w[2] = 8;
2753 w[3] = stringLen(_("Valid %")) + 2;
2754 w[4] = stringLen(_("Cum. %")) + 2;
2755 w[5] = stringLen(_("V. Cum. %")) + 2;
2756 if(w[3] < 8)
2757 w[3] = 8;
2758 if(w[4] < 8)
2759 w[4] = 8;
2760 if(w[5] < 8)
2761 w[5] = 8;
2762
2763 /* Counting frequencies */
2764 i = 0;
2765 do{
2766 x = y[i];
2767 n = 0;
2768 while(i < nn[acol[0]] && y[i] == x){
2769 n++;
2770 i++;
2771 }
2772 if(row == NULL){
2773 first_row = (FreqRow*)m_calloc(1, sizeof(FreqRow));
2774 row = first_row;
2775 } else{
2776 row->next = (FreqRow*)m_calloc(1, sizeof(FreqRow));
2777 row = row->next;
2778 }
2779 for(r = 2; r < 6; r++)
2780 row->c[r] = (char*)m_calloc(7, sizeof(char));
2781 sprintf(b, "%i", n);
2782 row->c[1] = (char*)m_calloc((strlen(b) + 1), sizeof(char));
2783 strcpy(row->c[1], b);
2784 p = 100 * (REAL)n / (REAL)nn[acol[0]];
2785 sprintf(row->c[2], "%6.2f", p);
2786 ap += p;
2787 sprintf(row->c[4], "%6.2f", ap);
2788 if(x == SYSMIS){
2789 m = n;
2790 row->c[0] = (char*)m_calloc((strlen(_("Missing")) + 1), sizeof(char));
2791 strcpy(row->c[0], _("Missing"));
2792 strcpy(row->c[3], "--- ");
2793 strcpy(row->c[5], "--- ");
2794 } else{
2795 if(names[acol[0]]){
2796 for(r = 0; r < names[acol[0]]->n; r++){
2797 if(x == names[acol[0]]->v[r]){
2798 row->c[0] = names[acol[0]]->l[r];
2799 break;
2800 }
2801 }
2802 if(row->c[0] == NULL){
2803 sprintf(b, "%6g", x);
2804 row->c[0] = (char*)m_calloc((strlen(b) + 1), sizeof(char));
2805 strcpy(row->c[0], b);
2806 }
2807 } else{
2808 sprintf(b, "%6g", x);
2809 row->c[0] = (char*)m_calloc((strlen(b) + 1), sizeof(char));
2810 strcpy(row->c[0], b);
2811 }
2812 vp = 100 * (REAL)n / (REAL)(nn[acol[0]] - m);
2813 sprintf(row->c[3], "%6.2f", vp);
2814 vap += vp;
2815 sprintf(row->c[5], "%6.2f", vap);
2816 }
2817 } while(i < nn[acol[0]]);
2818 if(first_row == NULL){
2819 out_err(ERR, ERR_FILE, ERR_LINE, "first_row == NULL!");
2820 return;
2821 }
2822
2823 /* Calculating column widths for "value" and "N"*/
2824 row = first_row;
2825 while(row){
2826 for(i = 0; i < 2; i++)
2827 if((stringLen(row->c[i]) + 2) > w[i])
2828 w[i] = stringLen(row->c[i]) + 2;
2829 row = row->next;
2830 }
2831
2832 /* Printing header */
2833 memset(l, 0, 256);
2834 if(names[acol[0]] && names[acol[0]]->ctitle)
2835 strncpy(l, names[acol[0]]->ctitle, 255);
2836 else
2837 strncpy(l, alias[acol[0]], 255);
2838 colorize(ClHeader);
2839 out_r(_("Frequencies: %s\n"), l);
2840 colorize(ClDefault);
2841 strcpy(b, _("Value"));
2842 sprintf(h, " %s ", center(l, b, w[0]));
2843 strcpy(b, _("N"));
2844 strcat(h, center(l, b, w[1]));
2845 strcat(h, center(l, "%", w[2]));
2846 strcpy(b, _("Valid %"));
2847 strcat(h, center(l, b, w[3]));
2848 strcpy(b, _("Cum. %"));
2849 strcat(h, center(l, b, w[4]));
2850 strcpy(b, _("V. Cum. %"));
2851 strcat(h, center(l, b, w[5]));
2852 r = stringLen(h) + 1;
2853 for(i = 0; i < r; i++)
2854 l[i] = '=';
2855 l[i] = 0;
2856 out_r("%s\n", l);
2857 colorize(ClHeader);
2858 out_r("%s\n", h);
2859 colorize(ClDefault);
2860 for(i = 0; i < r; i++)
2861 l[i] = '-';
2862 l[i] = 0;
2863 out_r("%s\n", l);
2864 for(i = 0; i < r; i++)
2865 l[i] = '=';
2866 l[i] = 0;
2867
2868 /* Printing rows */
2869 row = first_row;
2870 r = 4; /* table title + table header = 4 lines */
2871 int diff;
2872 while(row){
2873 diff = strlen(row->c[0]) - stringLen(row->c[0]);
2874 if(names[acol[0]])
2875 sprintf(b, " %%-%is", w[0]+diff);
2876 else
2877 sprintf(b, " %%%is", w[0]+diff);
2878 out_r(b, row->c[0]);
2879 for(i = 1; i < 6; i++){
2880 diff = strlen(row->c[i]) - stringLen(row->c[i]);
2881 sprintf(b, "%%%is", w[i]+diff);
2882 out_r(b, row->c[i]);
2883 }
2884 out_r("\n");
2885 if ((r+1) % (SCRLINES - 1) == 0){
2886 mywait();
2887 if(!empty){
2888 truncated = TRUE;
2889 break;
2890 }
2891 }
2892 r++;
2893 row = row->next;
2894 }
2895
2896 if(truncated){
2897 out_r("\n");
2898 out_err(MWA, ERR_FILE, ERR_LINE,
2899 _(" --- TABLE TRUNCATED ---"));
2900 } else{
2901 out_r("%s\n", l);
2902 }
2903 }
2904
2905 typedef struct {
2906 REAL v; /* the same as xx[0][j] */
2907 int i; /* the index: j */
2908 } Mirror;
2909
2910 int mirror_compare_up(const void *a, const void *b) {
2911 if (((Mirror*)a)->v > ((Mirror*)b)->v){
2912 return 1;
2913 } else{
2914 if (((Mirror*)a)->v < ((Mirror*)b)->v){
2915 return -1;
2916 } else{
2917 return 0;
2918 }
2919 }
2920 }
2921
2922 int whatwidth_i(int i){
2923 char s[20];
2924 sprintf(s, "%i", i);
2925 return(strlen(s));
2926 }
2927
2928 int whatwidth_r(REAL r){
2929 char s[20];
2930 int i, l;
2931 if(r == SYSMIS)
2932 return 1;
2933 i = floor(r);
2934 sprintf(s, "%i", i);
2935 l = strlen(s);
2936 if(i >= 100)
2937 return(l + 3); /* 100.00 */
2938 else
2939 if(i <= -100)
2940 return(l + 4); /* -100.00 */
2941 else
2942 if(i >=0 && i < 100) /* 1.0000 */
2943 return(l + 5);
2944 else /* -1.0000 */
2945 return(l + 6);
2946 }
2947
2948 /* The calculus and the printing of the Table of Means are truncated if there
2949 * are more than MRESULT. I think that such a table is only meaniful when there
2950 * is a small number of classes of frequency, and I suppose that the user has
2951 * chosen this analysis by mistake in these cases */
2952 void compare_means(int c){
2953 typedef struct {
2954 PREAL r; /* the class of frequency, and the means */
2955 PREAL s; /* the standard deviation */
2956 int *n; /* the frequency, and n. of valid values used to calc. the mean */
2957 void *next;
2958 } MeanRow;
2959
2960 int i, j, k, p, n, nc, w;
2961 int *nw, *rw, *sw; /* lists of widths for columns */
2962 char b[80], *l, *L;
2963 MeanRow *row, *firstrow;
2964 BOOLEAN truncated = FALSE, vlabelfound = FALSE;
2965 Mirror *y = (Mirror*)m_calloc(nn[acol[0]], sizeof(Mirror));
2966 REAL *z = (PREAL)m_calloc(nn[acol[0]], sizeof(REAL));
2967
2968 for(i = 0; i < nn[acol[0]]; i++){
2969 y[i].v = xx[acol[0]][i];
2970 y[i].i = i;
2971 }
2972
2973 /* Sort "y" to calculate frequencies, as in freq_table(), but keep track of
2974 * original indexes to calculate means. */
2975 qsort(y, nn[acol[0]], sizeof(Mirror), mirror_compare_up);
2976
2977 /* Calculating means, and filling the table of means */
2978 j = 0;
2979 k = 0;
2980 nc = 0;
2981 row = (MeanRow*)m_calloc(1, sizeof(MeanRow));
2982 firstrow = row;
2983 do{
2984 row->r = (PREAL)m_calloc(c, sizeof(REAL));
2985 row->s = (PREAL)m_calloc(c, sizeof(REAL));
2986 row->n = (int*)m_calloc(c, sizeof(int));
2987 row->next = NULL;
2988 row->r[0] = y[j].v;
2989 n = 0;
2990 i = j;
2991 while(j < nn[acol[0]] && y[j].v == row->r[0]){ /* Counting frequency */
2992 n++;
2993 j++;
2994 }
2995 row->n[0] = n;
2996 for(p = 1; p < c; p++){ /* Filling "z[]" to calculate the mean */
2997 n = 0;
2998 for(k = i; k < j; k++){
2999 if(xx[acol[p]][y[k].i] != SYSMIS){
3000 z[n] = xx[acol[p]][y[k].i];
3001 n++;
3002 }
3003 }
3004 if(n > 0)
3005 row->r[p] = get_mean(z, n);
3006 else
3007 row->r[p] = 10.0; /* This value will not be printed, */
3008 if(n > 1) /* but will be used to calculate the */
3009 row->s[p] = get_sdv(z, n); /* column width: 0.0000 is broader */
3010 else /* than 10.00 */
3011 row->s[p] = 10.0;
3012 row->n[p] = n;
3013 }
3014 nc++;
3015 if(nc > MRESULT)
3016 break;
3017 if(j < nn[acol[0]]){
3018 row->next = (MeanRow*)m_calloc(1, sizeof(MeanRow));
3019 row = row->next;
3020 }
3021 } while(j < nn[acol[0]]);
3022
3023 /* Calculating the necessary widths for columns */
3024 rw = (int*)m_calloc(c, sizeof(int));
3025 sw = (int*)m_calloc(c, sizeof(int));
3026 nw = (int*)m_calloc(c, sizeof(int));
3027 for(p = 0; p < c; p++){
3028 if(p == 0){
3029 rw[p] = stringLen(_("value"));
3030 sw[p] = 0;
3031 } else{
3032 rw[p] = stringLen(_("mean"));
3033 sw[p] = stringLen(_("sdev"));
3034 }
3035 nw[p] = 2;
3036 row = firstrow;
3037 do{
3038 i = whatwidth_i(row->n[p]);
3039 if(i > nw[p])
3040 nw[p] = i;
3041 i = whatwidth_r(row->r[p]);
3042 if(i > rw[p])
3043 rw[p] = i;
3044 if(p > 0){
3045 i = whatwidth_r(row->s[p]);
3046 if(i > sw[p])
3047 sw[p] = i;
3048 }
3049 if(row->next)
3050 row = row->next;
3051 }while(row->next);
3052 }
3053 if(names[acol[0]]){
3054 k = rw[0];
3055 for(i = 0; i < names[acol[0]]->n; i++)
3056 if((2 + stringLen(names[acol[0]]->l[i])) > k)
3057 k = stringLen(names[acol[0]]->l[i]) + 2;
3058 rw[0] = k;
3059 }
3060
3061 /* calculating the total width of the table */
3062 w = 0;
3063 sw[0] = 0;
3064 for(p = 0; p < c; p++){
3065 if(p != 0){
3066 rw[p] += 2; /* width += blank space between columns */
3067 sw[p] += 2;
3068 }
3069 nw[p] += 2;
3070 w += rw[p] + sw[p] + nw[p];
3071 }
3072
3073 /* Printing the table of means */
3074 if(names[acol[0]] && names[acol[0]]->ctitle)
3075 strncpy(b, names[acol[0]]->ctitle, 79);
3076 else
3077 strncpy(b, alias[acol[0]], 79);
3078 colorize(ClHeader);
3079 out_r(_("Comparison of means: %s\n"), b);
3080 colorize(ClDefault);
3081 l = (char*)m_calloc((w+3), sizeof(char));
3082 L = (char*)m_calloc((w+3), sizeof(char));
3083 for(i = 0; i <= w; i++){
3084 l[i] = '-';
3085 L[i] = '=';
3086 }
3087 l[i] = '\n';
3088 L[i] = '\n';
3089 out_r(L);
3090 colorize(ClHeader);
3091 for(p = 0; p < c; p++){
3092 i = rw[p] + sw[p] + nw[p];
3093 out_r("%s", center(b, alias[acol[p]], i));
3094 }
3095 colorize(ClDefault);
3096 out_r("\n");
3097 for(p = 0; p < c; p++){
3098 strcpy(b, " ");
3099 for(i = 0; i < (rw[p] + sw[p] + nw[p] - 1); i++)
3100 strcat(b, "-");
3101 out_r(b);
3102 }
3103 out_r("\n");
3104 colorize(ClHeader);
3105 out_r("%s ", center(b, _("value"), rw[0]));
3106 out_r("%s", center(b, _("N"), nw[0]));
3107 for(p = 1; p < c; p++){
3108 out_r("%s", center(b, _("mean"), rw[p]));
3109 out_r("%s", center(b, _("sdev"), rw[p]));
3110 out_r("%s", center(b, _("N"), nw[p]));
3111 }
3112 colorize(ClDefault);
3113 out_r("\n%s", l);
3114
3115 i = 5; /* header = 5 lines */
3116 row = firstrow;
3117 int diff;
3118 while(row){
3119 for(p = 0; p < c; p++){
3120 if(p == 0){
3121 if(names[acol[0]] && names[acol[0]]->n > 0){ /* print value label */
3122 if(row->r[p] == SYSMIS){
3123 sprintf(b, " %%-%is%%%ii", rw[p], nw[p]);
3124 out_r(b, NODATA, row->n[p]);
3125 } else{
3126 for(k = 0; k < names[acol[0]]->n; k++){
3127 if(row->r[p] == names[acol[0]]->v[k]){
3128 diff = strlen(names[acol[0]]->l[k]) - stringLen(names[acol[0]]->l[k]);
3129 sprintf(b, " %%-%is%%%ii", rw[p]+diff, nw[p]);
3130 out_r(b, names[acol[0]]->l[k], row->n[p]);
3131 vlabelfound = TRUE;
3132 break;
3133 }
3134 }
3135 if(vlabelfound)
3136 vlabelfound = FALSE;
3137 else{
3138 sprintf(b, " %%-%i.%if%%%ii", rw[p], 4, nw[p]);
3139 out_r(b, row->r[p], row->n[p]);
3140 }
3141 }
3142 } else{ /* no label for values: print value */
3143 if(row->r[p] == SYSMIS){
3144 sprintf(b, "%%%is%%%ii", rw[p], nw[p]);
3145 out_r(b, NODATA, row->n[p]);
3146 } else{
3147 sprintf(b, "%%%i.%if%%%ii", rw[p], 4, nw[p]);
3148 out_r(b, row->r[p], row->n[p]);
3149 }
3150 }
3151 } else{
3152 if(row->n[p] > 1){
3153 if((row->r[p] >= 100.0 || row->r[p] <= -100.0) &&
3154 (row->s[p] >= 100.0 || row->s[p] <= -100.0))
3155 sprintf(b, "%%%i.%if%%%i.%if%%%ii", rw[p], 2, sw[p], 2, nw[p]);
3156 else if((row->r[p] >= 100.0 || row->r[p] <= -100.0) &&
3157 (row->s[p] < 100.0 && row->s[p] > -100.0))
3158 sprintf(b, "%%%i.%if%%%i.%if%%%ii", rw[p], 2, sw[p], 4, nw[p]);
3159 else if((row->r[p] < 100.0 && row->r[p] > -100.0) &&
3160 (row->s[p] >= 100.0 || row->s[p] <= -100.0))
3161 sprintf(b, "%%%i.%if%%%i.%if%%%ii", rw[p], 4, sw[p], 2, nw[p]);
3162 else
3163 sprintf(b, "%%%i.%if%%%i.%if%%%ii", rw[p], 4, sw[p], 4, nw[p]);
3164 out_r(b, row->r[p], row->s[p], row->n[p]);
3165 } else{
3166 if(row->n[p] == 1){
3167 if(row->r[p] >= 100.0 || row->r[p] <= -100.0)
3168 sprintf(b, "%%%i.%if%%%is%%%ii", rw[p], 2, sw[p], nw[p]);
3169 else
3170 sprintf(b, "%%%i.%if%%%is%%%ii", rw[p], 4, sw[p], nw[p]);
3171 out_r(b, row->r[p], "--", row->n[p]);
3172 } else{
3173 sprintf(b, "%%%is%%%is%%%ii", rw[p], sw[p], nw[p]);
3174 out_r(b, "--", "--", 0);
3175 }
3176 }
3177 }
3178 }
3179 out_r("\n");
3180 i++;
3181 if ((i+1) % (SCRLINES - 1) == 0){
3182 mywait();
3183 if(!empty){
3184 truncated = TRUE;
3185 break;
3186 }
3187 }
3188 row = row->next;
3189 }
3190 if(truncated){
3191 out_r("\n");
3192 out_err(MWA, ERR_FILE, ERR_LINE,
3193 _(" --- TABLE TRUNCATED ---"));
3194 } else
3195 if(nc > MRESULT){
3196 out_r("\n");
3197 out_err(MWA, ERR_FILE, ERR_LINE,
3198 _(" --- TABLE TRUNCATED ---"));
3199 } else
3200 out_r("%s\n", L);
3201 }
3202
3203 /* =================================================================== */
3204