"Fossies" - the Fresh Open Source Software Archive

Member "statist-1.4.2/src/procs.c" (15 Nov 2006, 83062 Bytes) of package /linux/privat/old/statist-1.4.2.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "procs.c" see the Fossies "Dox" file reference documentation.

    1 /* This file is part of statist
    2 **
    3 ** It is distributed under the GNU General Public License.
    4 ** See the file COPYING for details.
    5 **
    6 ** (c) 1997 Dirk Melcher
    7 **  Doerper Damm 4
    8 **  49134 Wallenhorst
    9 **  GERMANY
   10 **  Tel. 05407/7636
   11 **  email: Dirk.Melcher@usf.Uni-Osnabrueck.DE
   12 **
   13 **  small Changes by Bernhard Reiter  http://www.usf.Uni-Osnabrueck.DE/~breiter
   14 ** $Id: procs.c,v 1.35 2006/11/15 17:17:34 jakson Exp $
   15 **
   16 **
   17 ** further changes: Andreas Beyer, 1999, abeyer@usf.uni-osnabrueck.de
   18 ***************************************************************/
   19 
   20 /* procs.c for STATIST */
   21 
   22 #include <stdio.h>
   23 #include <math.h>
   24 #include <string.h>
   25 #include <stdlib.h>
   26 #include <limits.h>
   27 #include <float.h>
   28 
   29 #include "statist.h"
   30 #include "funcs.h"
   31 #include "plot.h"
   32 #include "procs.h"
   33 #include "data.h"
   34 #include "gettext.h"
   35 
   36 
   37 /* =================================================================== */
   38 
   39 
   40 void part_corr(PREAL xx[], int nrow, int ncol) {
   41   PREAL r[6], x, y;
   42   REAL  x_sdv, y_sdv, s_xy;
   43   int   i, k, df;
   44   REAL r12s3, r12s4, r53s4, r25s4, r15s4, r12s5, r13s5,
   45         r23s5, r12s34, r15s34, r25s34, r14s5, r24s5, r12s35, r12s45,
   46         r12s345, r13s4, r23s4, t, alpha, a, r14s3, r13s2;
   47 /*
   48    calculate partial correlation;
   49    at most 3 constant correlations
   50 */
   51 
   52   for (i=0; i<ncol; i++) {
   53     r[i+1] = (REAL*)m_calloc((ncol+1), sizeof(REAL));
   54     for (k=0; k<ncol; k++) {
   55       x = xx[i];
   56       y = xx[k];
   57 
   58       s_xy = (get_xysum(x,y,nrow) - ((get_sum(x,nrow)*get_sum(y,nrow))/(REAL) nrow))/
   59              (REAL) (nrow-1);
   60       x_sdv = get_sdv(x,nrow);
   61       y_sdv = get_sdv(y,nrow);
   62       DIV(r[i+1][k+1], s_xy, (x_sdv*y_sdv));
   63     }
   64   }
   65 
   66   DIV(r12s3, (r[1][2]-r[1][3]*r[2][3]), sqrt((1.-SQR(r[1][3]))*(1.-SQR(r[2][3]))));
   67   DIV(r13s2, (r[1][3]-r[1][2]*r[3][2]),sqrt((1.-SQR(r[1][2]))*(1.-SQR(r[3][2]))));
   68 
   69   out_start();
   70   colorize(ClHeader);
   71   out_r(_("\nResult partial correlation:\n") );
   72   colorize(ClDefault);
   73   out_r(" r12   = %f\n\n", r[1][2]);
   74 
   75   out_r(_("One constant correlation:") );
   76   out_r("\n r12_3 = %f  diff= %f\n", r12s3, (r[1][2]-r12s3));
   77   out_r(" r13_2 = %f  diff= %f\n", r13s2, (r[1][3]-r13s2));
   78 
   79   if (ncol >= 4) {
   80     DIV(r12s4, (r[1][2]-r[1][4]*r[2][4]), sqrt((1.-SQR(r[1][4]))*(1.-SQR(r[2][4]))));
   81     DIV(r13s4, (r[1][3]-r[1][4]*r[3][4]), sqrt((1.-SQR(r[1][4]))*(1.-SQR(r[3][4]))));
   82     DIV(r23s4, (r[2][3]-r[2][4]*r[3][4]), sqrt((1.-SQR(r[2][4]))*(1.-SQR(r[3][4]))));
   83 
   84     DIV(r14s3, (r[1][4]-r[1][3]*r[4][3]), sqrt((1.-SQR(r[1][3]))*(1.-SQR(r[4][3]))));
   85 
   86     DIV(r12s34, (r12s4-r13s4*r23s4), sqrt((1.-SQR(r13s4))*(1.-SQR(r23s4))));
   87 
   88     out_r(" r12_4 = %f  diff= %f\n", r12s4, r[1][2]-r12s4);
   89     out_r(" r13_4 = %f  diff= %f\n", r13s4, r[1][3]-r13s4);
   90 
   91     out_r(" r14_3 = %f  diff= %f\n\n", r14s3, r[1][4]-r14s3);
   92     out_r(_("Two constant correlations:"));
   93     out_r("\n r12_34 = %f  diff= %f\n", r12s34, r[1][2]-r12s34);
   94 
   95     if (ncol == 5) {
   96       DIV(r53s4, (r[5][3]-r[5][4]*r[3][4]), sqrt((1.-SQR(r[5][4]))*(1.-SQR(r[3][4]))));
   97       DIV(r25s4, (r[2][5]-r[2][4]*r[4][5]), sqrt((1.-SQR(r[2][4]))*(1.-SQR(r[4][5]))));
   98       DIV(r15s4, (r[1][5]-r[1][4]*r[4][5]), sqrt((1.-SQR(r[1][4]))*(1.-SQR(r[4][5]))));
   99       DIV(r12s5, (r[1][2]-r[1][5]*r[2][5]), sqrt((1.-SQR(r[1][5]))*(1.-SQR(r[2][5]))));
  100       DIV(r13s5, (r[1][3]-r[1][5]*r[3][5]), sqrt((1.-SQR(r[1][5]))*(1.-SQR(r[3][5]))));
  101       DIV(r23s5, (r[2][3]-r[2][5]*r[3][5]), sqrt((1.-SQR(r[2][5]))*(1.-SQR(r[3][5]))));
  102 
  103       DIV(r15s34, (r15s4-r13s4*r53s4), sqrt((1.-SQR(r13s4))*(1.-SQR(r53s4))));
  104       DIV(r25s34, (r25s4-r23s4*r53s4), sqrt((1.-SQR(r23s4))*(1.-SQR(r53s4))));
  105       DIV(r14s5, (r[1][4]-r[1][5]*r[4][5]), sqrt((1.-SQR(r[1][5]))*(1.-SQR(r[4][5]))));
  106       DIV(r24s5, (r[2][4]-r[2][5]*r[4][5]), sqrt((1.-SQR(r[2][5]))*(1.-SQR(r[4][5]))));
  107       DIV(r12s35, (r12s5-r13s5*r23s5), sqrt((1.-SQR(r13s5))*(1.-SQR(r23s5))));
  108       DIV(r12s45, (r12s5-r14s5*r24s5), sqrt((1.-SQR(r14s5))*(1.-SQR(r24s5))));
  109 
  110       DIV(r12s345, (r12s34-r15s34*r25s34), sqrt((1.-SQR(r15s34))*(1.-SQR(r25s34))));
  111 
  112       out_r(" r12_5 = %f  diff= %f\n", r12s5, r[1][2]-r12s5);
  113       out_r(" r12_35 = %f  diff= %f\n", r12s35, r[1][2]-r12s35);
  114       out_r(" r12_45 = %f  diff= %f\n\n", r12s45, r[1][2]-r12s45);
  115       out_r(_("Three constant correlations:") );
  116       out_r("\n r12_345 = %f  diff= %f\n", r12s345, r[1][2]-r12s345);
  117     }
  118   }
  119 
  120    df = nrow - 3;
  121    t = r12s3/(sqrt(1.-SQR(r12s3))) * sqrt((REAL)df);
  122    /* f = SQR(r12s3)/(1.-SQR(r12s3)) * (REAL)(nrow-2); */
  123    alpha = get_t_int(fabs(t), df);
  124 
  125    out_r( _("\nt-Test:\n"));
  126    out_r(_("Hypothesis H0: r12_3 = 0  against hypothesis H1: r12_3 # 0  "
  127      "(->two sided)\n") );
  128    a = 1.0-alpha;
  129    out_r(_("Probability of H0 = %6.4f\n\n"), a);
  130 /*
  131    out_r("F-Test:\n");
  132    alpha = get_f_int(f, 1, df);
  133    a = alpha;
  134    out_r("Hypothese H0: r=0  gegen  Hypothese H1: r#0  (->zweiseitig)\n");
  135    out_r("Irrtumswahrscheinlichkeit alpha fuer H0 = %6.4f\n\n", a);
  136 */
  137   out_end();
  138 
  139 }
  140 
  141 /* =================================================================== */
  142 
  143 
  144 void multiple_reg(REAL y[], PREAL x[], int nrow, int ncol) {
  145    REAL *b, *resi, sdv, rquad, alpha, f_calc, reg, theo;
  146    int i, j, rows;
  147    FILE *rfile;
  148    BOOLEAN hasmiss = FALSE;
  149 
  150    b = (REAL*)m_calloc(ncol+1, sizeof(REAL));
  151    if ((rquad=get_multiple_reg(y, x, nrow, ncol, b, &sdv, &f_calc))==REAL_MIN) {
  152      return;
  153    }
  154 
  155    out_start();
  156    colorize(ClHeader);
  157    out_r(_("\nResults multiple linear regression:\n") );
  158    colorize(ClDefault);
  159    out_r(_("Regressed equation: y = B[0] + B[1]*x[1] + B[2]*x[2] +...+") );
  160    out_r(" B[n]*x[n] \n\n");
  161 
  162    for (i=0; i<(ncol+1); i++) {
  163      out_r("B[%i] = %f\n", i, b[i]);
  164    }
  165    out_r("\n");
  166 
  167 
  168    if (!noplot) {
  169      if (ncol==2) {
  170        plot_tripl(x[0], x[1], y, nrow, b[0], b[1], b[2],
  171        get_label(x[0]), get_label(x[1]), get_label(y));
  172      }
  173 
  174      if (ncol==1) {
  175        plot_pair(x[0], y, nrow, b[0], b[1], get_label(x[0]), get_label(y));
  176      }
  177    }
  178 
  179    /* If there is any missing value, the residues will be in the wrong rows.
  180     * To avoid this, we need to realloc cols before calculating "resi" */
  181    rows= nrow;
  182    for(j = 0; j < ncol; j++)
  183      if(nn[acol[j]] != vn[acol[j]]){
  184        alloc_cols((ncol + 1), 3);
  185        rows = nn[acol[0]];
  186        break;
  187      }
  188 
  189    resi = (REAL*)m_calloc(rows, sizeof(REAL));
  190 
  191    for(j = 0; j < rows; j++){
  192      theo = b[0];
  193      for(i = 1; i <= ncol; i++){
  194        if(xx[acol[i]][j] == SYSMIS){
  195      hasmiss = TRUE;
  196      break;
  197        }
  198        theo += b[i] * xx[acol[i]][j];
  199      }
  200      if(hasmiss){
  201        resi[j] = SYSMIS;
  202        hasmiss = FALSE;
  203      } else
  204        resi[j] = xx[acol[0]][j] - theo;
  205    }
  206 
  207    if ((j=col_exist("resi", FALSE)) == -1) {
  208      if (!(make_new_col("resi", rows))) {
  209        out_end();
  210        return;
  211      }
  212    } else{
  213 #ifndef STATIST_X
  214      out_r(_("Column 'resi' updated!\n") );
  215 #endif
  216    }
  217    if ((j=col_exist("resi", FALSE)) != -1) {
  218      rfile = tmpptr[j];
  219    } else{
  220      return;
  221    }
  222    rewind(rfile);
  223    FWRITE(resi, sizeof(REAL), rows, rfile);
  224    rewind(rfile);
  225 
  226    SQRT(reg, rquad);
  227    out_r(_("Coefficient of determination r^2 = %f\n"), rquad);
  228    out_r(_("Correlation coefficient r = %f\n"), reg);
  229    out_r(_("Standard deviation s = %f\n"), sdv);
  230    out_r(_("Number of data points n = %i\n"), nrow);
  231 
  232    if (!equal(1.0, reg)) {
  233      out_r(_("\nF-value = %f\n"), f_calc);
  234      out_r(_("Degree of freedom f1 = %i\n"), ncol);
  235      out_r(_("Degree of freedom f2 = %i\n"), nrow-ncol-1);
  236 
  237      out_r(_("\nF-Test:\n"));
  238      out_r(_("Hypothesis H0: r=0  against hypothesis H1: r#0 \n") );
  239      alpha = get_f_int(f_calc, ncol, nrow-ncol-1);
  240      if (reg < 0.0) {
  241        alpha = 1.-alpha;
  242      }
  243      out_r(_("Probability of H0: %6.4f\n\n"), 1.0-alpha);
  244    }
  245    else {
  246      out_r(_("F-Test not possible since r = 1\n\n") );
  247    }
  248    out_end();
  249 }
  250 
  251 /* =================================================================== */
  252 
  253 
  254 
  255 void cross_validate(REAL y[], PREAL x[], int nrow, int nc) {
  256   char *pred_label;
  257   REAL rquad_ori_pred, qquad, rquad, sdv, f_calc;
  258   REAL *b, *ypred, *ypredm;
  259   char analias[80];
  260   int i, j, k;
  261   BOOLEAN hasmiss = FALSE;
  262 
  263 
  264   b = (REAL*) m_calloc(nc+1, sizeof(REAL));
  265   ypred = (REAL*) m_calloc(nrow, sizeof(REAL));
  266   if ((qquad=get_cross_validate(y, x, nrow, nc, ypred))==REAL_MIN) {
  267     return;
  268   }
  269   if ((rquad=get_multiple_reg(y,x,nrow,nc,b,&sdv,&f_calc))==REAL_MIN) {
  270     return;
  271   }
  272   if ((rquad_ori_pred=get_multiple_reg(ypred,&y,nrow,1,b,&sdv,&f_calc))==REAL_MIN) {
  273     return;
  274   }
  275 
  276   if (!noplot) {
  277     pred_label = m_calloc(1,strlen(get_label(y))+strlen(" vorhergesagt")+1);
  278     strcpy(pred_label, get_label(y));
  279     strcat(pred_label, _(" predicted") );
  280     plot_pair(y, ypred, nrow, b[0], b[1], get_label(y), pred_label);
  281   }
  282 
  283   out_start();
  284   out_r(_("Coefficient of determination r^2 = %f\n"), rquad);
  285   out_r(_("r^2 of regression y versus y_pred = %f\n"), rquad_ori_pred);
  286   out_r(_("Variance of prediction q^2 = %f\n"), qquad);
  287 
  288   /* If we have missing values, the ypred values might be in the wrong rows.
  289    * In this case, we need to insert the missing values in "ypred". */
  290   ypredm = ypred;
  291   for(i = 0; i <= nc; i++)
  292     if(nn[acol[i]] != vn[acol[i]]){
  293       hasmiss = TRUE;
  294       nrow = nn[acol[0]];
  295       nc++;
  296       alloc_cols(nc, 3);
  297       break;
  298     }
  299   if(hasmiss){
  300     hasmiss = FALSE;
  301     k = 0;
  302     ypredm = (REAL*) m_calloc(nrow, sizeof(REAL));
  303     for(i = 0; i < nrow; i++){
  304       for(j = 0; j < nc; j++)
  305     if(xx[acol[j]][i] == SYSMIS){
  306       hasmiss = TRUE;
  307       break;
  308     }
  309       if(hasmiss){
  310     ypredm[i] = SYSMIS;
  311     hasmiss = FALSE;
  312       }
  313       else{
  314     ypredm[i] = ypred[k];
  315     k++;
  316       }
  317     }
  318   }
  319   
  320   /* create new data record: */
  321   strcpy(analias, "pred_");
  322   strncat(analias, get_name(xx[acol[0]]), 79-strlen(analias));
  323   if (!(make_new_col(analias, nrow))) {
  324     out_end();
  325     return;
  326   }
  327   
  328   FWRITE(ypredm, sizeof(REAL), nrow, tmpptr[ncol - 1]);
  329   out_end();
  330 }
  331 
  332 /* ================================================================== */
  333 
  334 
  335 void random_tupel(REAL y[], PREAL x[], int nrow, int nc, int ntup) {
  336   int  i, k, j;
  337   BOOLEAN found;
  338   TUPEL *tupel_list, atupel;
  339   REAL rquad, sdv, f_calc;
  340   REAL *yperm, *b, *ypred, qquad;
  341   FILE *fp1, *fp2;
  342 
  343   yperm = (REAL*) m_calloc(nrow, sizeof(REAL));
  344   ypred = (REAL*) m_calloc(nrow, sizeof(REAL));
  345   atupel.a = (unsigned short int*) m_calloc(nrow, sizeof(unsigned short int));
  346   atupel.n = nrow;
  347   tupel_list = (TUPEL*) m_calloc(ntup, sizeof(TUPEL));
  348   b = (REAL*)m_calloc(nc+1, sizeof(REAL));
  349 
  350   if ((rquad=get_multiple_reg(y, x, nrow, nc, b, &sdv, &f_calc))==REAL_MIN) {
  351     return;
  352   };
  353   if ((qquad=get_cross_validate(y, x, nrow, nc, ypred))==REAL_MIN) {
  354     return;
  355   }
  356   out_start();
  357   out_r(_("\nOriginal y-Vector: r^%6.4f  q^2=%6.4f\n\n"), rquad, qquad);
  358 
  359   if(!(make_new_col("rquad", ntup))){
  360     out_end();
  361     return;
  362   }
  363   fp1 = tmpptr[ncol - 1];
  364   if(!(make_new_col("qquad", ntup))){
  365     out_end();
  366     return;
  367   }
  368   fp2 = tmpptr[ncol - 1];   
  369 
  370   out_d(_("Starting with randomization of y-vector. Please be patient ...\n"));
  371   i=0;
  372   do {
  373     create_tupel(&atupel, nrow);
  374     /* print_tupel(atupel); */
  375     found = FALSE;
  376     for (k=0; k<i; k++) {
  377       if (equal_tupel(atupel, tupel_list[k])) {
  378     found = TRUE;
  379     break;
  380       }
  381     }
  382 
  383     if (!found) {
  384       copy_tupel(&tupel_list[i], &atupel);
  385       i++;
  386     }
  387     if (i%100==0) {
  388       out_d(".");
  389       fflush(stdout);
  390     }
  391   } while (i<ntup);
  392   out_d("\n");
  393 
  394   out_d(_("Calculating q^2 and r^2 of randomized vectors ...") );
  395 
  396 #define _FINISH_RANDOM_TUPLE    \
  397   delete_last_columns(2);   \
  398   out_end();
  399     
  400     
  401   for (i=0; i<ntup; i++) {
  402     if (i%100==0) {
  403       out_d(".");
  404       fflush(stdout);
  405     }
  406     for (k=0; k<nrow; k++) {
  407       j = tupel_list[i].a[k];
  408       yperm[k] = y[j];
  409     }
  410     if ((rquad=get_multiple_reg(yperm, x, nrow, nc, b, &sdv, &f_calc))==REAL_MIN) {
  411     _FINISH_RANDOM_TUPLE
  412     return;
  413     }
  414     if ((qquad=get_cross_validate(yperm, x, nrow, nc, ypred))==REAL_MIN) {
  415     _FINISH_RANDOM_TUPLE
  416     return;
  417     }
  418     FWRITE(&rquad, sizeof(REAL), 1, fp1);
  419     FWRITE(&qquad, sizeof(REAL), 1, fp2);
  420   }
  421   out_d(_("\ndone!\n\n") );
  422 
  423 #undef _FINISH_RANDOM_TUPLE
  424 }
  425 
  426 /* ================================================================= */
  427 
  428 void poly_reg(REAL x[], REAL y[], int n, int m) {
  429 
  430    REAL a[2*MPOLY+1], e[MPOLY+2], q[MPOLY+1][MPOLY+2];
  431 
  432    REAL  br, cr, sr, tr, jor, fr, kr, pr, alpha;
  433    int   i, jo, t, s, f1, f2;
  434 
  435    for (i=1; i<(2*m+1); i++) {
  436      a[i] = 0.0;
  437    }
  438 
  439    for (i=0; i<(m+2); i++) {
  440      e[i] = 0.0;
  441    }
  442 
  443    a[0] = (REAL) n;
  444 
  445    for (i=0; i<n; i++) {
  446      pr = 1.;
  447      for (jo=1; jo<(2*m+1); jo++) {
  448        pr *= x[i];
  449        a[jo] += pr;
  450      }
  451      pr = y[i];
  452      for (jo=0; jo<(m+1); jo++) {
  453        e[jo] += pr;
  454        pr *= x[i];
  455        q[jo][m+1] = e[jo];
  456      }
  457      e[m+1] += SQR(y[i]);
  458    }
  459 
  460    for (i=0; i<(m+1); i++) {
  461      for (jo=0; jo<(m+1); jo++) {
  462        q[i][jo] = a[i+jo];
  463      }
  464    }
  465 
  466 /* Begin of Gauss-Elimination */
  467    for (s=0; s<(m+1); s++) {
  468      t = s;
  469      label1: ;
  470      if (q[t][s] != 0.0) {
  471        goto label2;
  472      }
  473      t ++;
  474      if (t < m) {
  475        goto label1;
  476      }
  477      out_err(MAT, ERR_FILE, ERR_LINE,
  478      _("Gauss-Elimination: No possible solution!") );
  479      return;
  480      label2: ;
  481      for (jo=0; jo<(m+2); jo++) {
  482        br = q[s][jo];
  483        q[s][jo] = q[t][jo];
  484        q[t][jo] = br;
  485      }
  486      cr = 1./q[s][s];
  487      for (jo=0; jo<(m+2); jo++) {
  488        q[s][jo] *= cr ;
  489      }
  490      for (t=0; t<(m+1); t++) {
  491        if (t == s) {
  492          goto label3;
  493        }
  494        cr = -q[t][s];
  495        for (jo=0; jo<(m+2); jo++) {
  496          q[t][jo] += (cr * q[s][jo]);
  497        }
  498      label3: ;
  499      }
  500    }
  501 /* End of Gauss-Elimination */
  502 
  503    sr = 0.0;
  504    for (i=1; i<(m+1); i++) {
  505      sr += q[i][m+1] * (e[i]-a[i]*e[0]/(REAL)n);
  506    }
  507 
  508    tr = e[m+1] - e[0]*e[0]/(REAL)n;
  509    cr = tr-sr;
  510    f2 = n-m-1;
  511    DIV(jor, sr, (REAL)m);
  512    DIV(kr, cr, (REAL)f2);
  513    f1 = m;
  514    DIV(fr, jor, kr);
  515    DIV(br, sr, tr);
  516    SQRT(kr, br);
  517    DIV(sr, cr, (REAL)f2);
  518    SQRT(sr, sr);
  519 
  520    out_start();
  521    for (i=0; i<(m+1); i++) {
  522      a[i] = q[i][m+1];
  523      out_r(_("Coefficient a%i = %15e\n"), i, a[i]);
  524 
  525    }
  526 
  527    colorize(ClHeader);
  528    out_r(_("\nResult polynomial regression:\n") );
  529    colorize(ClDefault);
  530    out_r(_("Regressed equation: y = a0 + a1*x + a2*x^2 +...+ an*x^n\n\n") );
  531 
  532    if (!noplot)
  533      plot_poly(x, y, n, a, m, get_label(x), get_label(y));
  534 
  535    out_r(_("Coefficient of determination r^2 = %f\n"), br);
  536    out_r(_("Correlation coefficient r = %f\n"), kr);
  537    out_r(_("Standard deviation s = %f\n"), sr);
  538 
  539    if (!equal(1.0, kr)) {
  540      out_r(_("F-value = %f\n"), fr);
  541      out_r(_("Degree of freedom f1 = %i\n"), f1);
  542      out_r(_("Degree of freedom f2 = %i\n"), f2);
  543 
  544      out_r(_("\nF-Test:\n"));
  545      out_r(_("Hypothesis H0:  r=0  against hypothesis H1: r>0 or r<0\n") );
  546      alpha = get_f_int(fr, f1, f2);
  547      if (kr < 0.0) {
  548        alpha = 1.-alpha;
  549      }
  550      out_r(_("Probability of H0: %6.4f\n\n"), 1.0-alpha);
  551    }
  552    else {
  553      out_r(_("F-Test not possible since r = 1\n\n") );
  554    }
  555    out_end();
  556 }
  557 
  558 /* =================================================================== */
  559 
  560 
  561 void correl_matrix(PREAL xx[], int nrow, int ncol) {
  562    int i, k;
  563    REAL *x, *y, x_sdv, y_sdv, s_xy;
  564    /*   PREAL *r;    */
  565    PREAL r[MCOL];
  566    char label[10];
  567 
  568    /* r = (PREAL*)m_calloc(ncol, sizeof(PREAL)); */
  569    for (i=0; i<ncol; i++) {
  570      r[i] = (REAL*) m_calloc(ncol, sizeof(REAL));
  571    }
  572 
  573    for (i=0; i<ncol; i++) {
  574      r[i][i] = 1.0;
  575      for (k=0; k<i; k++) {
  576        x = xx[i];
  577        y = xx[k];
  578        s_xy = (get_xysum(x,y,nrow) - ((get_sum(x,nrow)*get_sum(y,nrow))/
  579               (REAL) nrow))/ (REAL) (nrow-1);
  580        x_sdv = get_sdv(x,nrow);
  581        y_sdv = get_sdv(y,nrow);
  582        DIV(r[i][k], s_xy, (x_sdv*y_sdv));
  583        r[k][i] = r[i][k];
  584      }
  585    }
  586 
  587    out_start();
  588    for(i = 0; i < 10; i++)
  589      label[i] = 0;
  590    colorize(ClHeader);
  591    out_r(_("Matrix of linear correlation of variables:\n\n") );
  592    out_r(_("Variable     ") );
  593    for (i=0; i<ncol; i++) {
  594      /*  out_r("%8i", (i+1)); */
  595      strncpy(label, get_name(xx[i]), 9);
  596      if (strlen(label)>6) {
  597        label[6] = '.';
  598        label[7] = '\0';
  599      }
  600      out_r("%-8s", label);
  601    }
  602    colorize(ClDefault);
  603    out_r("\n");
  604    for (i=0; i<ncol; i++) {
  605      out_r("--------");
  606    }
  607    out_r("------------\n");
  608 
  609    for (i=0; i<ncol; i++) {
  610      /*  out_r("      %2i |  ", (i+1)); */
  611      strncpy(label, get_name(xx[i]), 9);
  612      if (strlen(label)>6) {
  613        label[6] = '.';
  614        label[7] = '\0';
  615      }
  616      colorize(ClHeader);
  617      out_r(" %-7s", label);
  618      colorize(ClDefault);
  619      out_r(" | ");
  620      for (k=0; k<ncol; k++) {
  621        out_r("%8.4f", r[i][k]);
  622      }
  623      out_r("\n");
  624    }
  625    out_r("\n");
  626    out_end();
  627 }
  628 
  629 
  630 /* =================================================================== */
  631 
  632 void rank_matrix(PREAL xx[], int nrow, int ncol) {
  633    int i, k;
  634    REAL *x, *y;
  635    /*   PREAL *r; */
  636    PREAL r[MCOL];
  637 
  638    char label[64];
  639 
  640    /* r = (PREAL*)m_calloc(ncol, sizeof(PREAL)); */
  641    for (i=0; i<ncol; i++) {
  642      r[i] = (REAL*) m_calloc(ncol, sizeof(REAL));
  643    }
  644 
  645    for (i=0; i<ncol; i++) {
  646      r[i][i] = 1.0;
  647      for (k=0; k<i; k++) {
  648        x = xx[i];
  649        y = xx[k];
  650        if ((r[i][k]=get_rank_correlation(x, y, nrow))==REAL_MIN) {
  651      return;
  652        }
  653        r[k][i] = r[i][k];
  654      }
  655    }
  656 
  657    out_start();
  658    colorize(ClHeader);
  659    out_r(_("Matrix of SPEARMAN'S Rank-Correlation-Coefficients\n") );
  660    out_r(_("of the variables:\n\n") );
  661    out_r(_("Variable     ") );
  662    for (i=0; i<ncol; i++) {
  663      /*  out_r("%8i", (i+1)); */
  664      strncpy(label, get_name(xx[i]), 9);
  665      if (strlen(label)>6) {
  666        label[6] = '.';
  667        label[7] = '\0';
  668      }
  669      out_r("%-8s", label);
  670    }
  671    colorize(ClDefault);
  672    out_r("\n");
  673    for (i=0; i<ncol; i++) {
  674      out_r("--------");
  675    }
  676    out_r("------------\n");
  677 
  678    for (i=0; i<ncol; i++) {
  679      /*  out_r("      %2i |  ", (i+1)); */
  680      strncpy(label, get_name(xx[i]), 9);
  681      if (strlen(label)>6) {
  682        label[6] = '.';
  683        label[7] = '\0';
  684      }
  685      colorize(ClHeader);
  686      out_r(" %-7s", label);
  687      colorize(ClDefault);
  688      out_r(" | ");
  689      for (k=0; k<ncol; k++) {
  690        out_r("%8.4f", r[i][k]);
  691      }
  692      out_r("\n");
  693    }
  694    out_r("\n");
  695   out_end();
  696 }
  697 
  698 
  699 /*    out_r("Variable "); */
  700 /*    for (i=0; i<ncol; i++) { */
  701 /*      out_r("%8i", (i+1)); */
  702 /*    } */
  703 /*    out_r("\n"); */
  704 /*    for (i=0; i<ncol; i++) { */
  705 /*      out_r("--------"); */
  706 /*    } */
  707 /*    out_r("------------\n"); */
  708 
  709 /*    for (i=0; i<ncol; i++) { */
  710 /*      out_r("      %2i |  ", (i+1)); */
  711 /*      for (k=0; k<ncol; k++) { */
  712 /*        out_r("%8.4f", r[i][k]); */
  713 /*      } */
  714 /*      out_r("\n"); */
  715 /*    } */
  716 /*    out_r("\n"); */
  717 /*  } */
  718 
  719 
  720 /* =================================================================== */
  721 
  722 void probit(REAL dose[], REAL num[], REAL effect[], int n) {
  723   REAL propcalc, chisq, ycalc;
  724   REAL *aprobit, *logx, neq, yh, w, oc;
  725   int i, np, offset, ind, old, nerr = 0;
  726   BOOLEAN positiv, equal_num, equal_dose, *infinity, show_errors = TRUE;
  727   REAL x_mean, x_sdv, y_mean, y_sdv;
  728   REAL s_xy, r_xy, a1, a0, ed50, ed90, alpha, tau, arg;
  729   REAL  nw_sum, nwx_sum, nwxsqr_sum, s_m, conf_l, conf_u, mq_m;
  730 
  731   REAL dose0, dose1;
  732 
  733   aprobit = (REAL*)m_calloc(n, sizeof(REAL));
  734   logx = (REAL*)m_calloc(n, sizeof(REAL));
  735   infinity = (BOOLEAN*)m_calloc(n, sizeof(BOOLEAN));
  736 
  737   np = 0;
  738 
  739   out_start();
  740   for (i=0; i<n; i++) {
  741     yh = effect[i]/num[i];
  742     if (yh < 0.50) {
  743       positiv = TRUE;
  744       yh = 1. - yh;
  745     }
  746     else {
  747       positiv = FALSE;
  748     }
  749 
  750     if (yh >= 1.0) {
  751       infinity[i] = TRUE;
  752       nerr++;
  753       if(nerr == 30){
  754     out_d("\n\n");
  755     out_i(_("Continue showing these math warnings? (%s) "), _("y/N"));
  756     GETNLINE;
  757     if(!(empty) && (line[0] != _("y")[0] || line[0] != _("Y")[0])){
  758       show_errors = FALSE;
  759     }
  760       }
  761       if(show_errors){
  762     out_err(MWA, ERR_FILE, ERR_LINE,
  763         _("Can not calculate probit: dose %g count %g effect %g"),
  764         dose[i], num[i], effect[i]);
  765       }
  766     }
  767     else {
  768       infinity[i] = FALSE;
  769 
  770       neq = 0.;
  771       if (!equal(yh, 0.5)) {
  772           neq = get_z(yh);
  773       }
  774 
  775       if (positiv) {
  776         neq = -1. * neq;
  777       }
  778       aprobit[np] = neq + 5.;
  779 
  780       if (dose[i] <= 0.0) {
  781         out_err(MAT, ERR_FILE, ERR_LINE, _("Dose %i <= 0!\n"), i);
  782         out_end();
  783         return;
  784       }
  785 
  786       logx[np] = log10(dose[i]);
  787       out_r(_("dose=%6g  num=%g effect=%2f prop=%4.2f probit=%5.2f\n"),
  788              dose[i], num[i], effect[i], (effect[i]/num[i]), aprobit[np]);
  789       np++;
  790     }
  791     if(show_errors){
  792       if((np + nerr + 1) % 12 == 0)
  793     mywait();
  794     } else
  795       if((np + 1) % 12 == 0)
  796     mywait();
  797   }
  798 
  799 
  800   if (np < 2) {
  801     out_err(MAT, ERR_FILE, ERR_LINE,
  802     _("Less than 2 valid dose-effect pairs!") );
  803     out_end();
  804     return;
  805   }
  806 
  807 
  808 /* Test for equal probabilities of effects and equal doses, respectively */
  809   equal_num = TRUE;
  810   equal_dose = TRUE;
  811   i=0;
  812   while(infinity[i]) {
  813     i++;
  814   }
  815   old=i;
  816   for (ind=0; ind<np; ind++) {
  817     while(infinity[i]) {
  818       i++;
  819     }
  820     if ( (ind>0) && ((effect[i]/num[i])!=(effect[old]/num[old])) ) {
  821       equal_num = FALSE;
  822     }
  823     if ( (ind>0) && (dose[i] != dose[old]) ) {
  824       equal_dose = FALSE;
  825     }
  826     old=i;
  827     i++;
  828   }
  829 
  830   if (equal_num) {
  831     out_err(MAT, ERR_FILE, ERR_LINE, _("All effect probabilities are equal!"));
  832     out_end();
  833     return;
  834   }
  835 
  836   if (equal_dose) {
  837     out_err(MAT, ERR_FILE, ERR_LINE, _("All doses are equal!") );
  838     out_end();
  839     return;
  840   }
  841 
  842 
  843   mywait();
  844 
  845   x_mean = get_mean(logx,np);
  846   y_mean = get_mean(aprobit,np);
  847   x_sdv = get_sdv(logx,np);
  848   y_sdv = get_sdv(aprobit,np);
  849   s_xy = (get_xysum(logx,aprobit,np) - ((get_sum(logx,np)*get_sum(aprobit,np))/
  850           (REAL) np))/(REAL) (np-1);
  851   r_xy = s_xy/(x_sdv*y_sdv);
  852 /*  rr = r_xy * r_xy; */
  853   a1 = r_xy * (y_sdv/x_sdv);
  854   a0 = y_mean - a1*x_mean;
  855   DIV(arg,  SQR(r_xy), (1.-SQR(r_xy)));
  856   SQRT(tau, ((np-2.)*arg));
  857 
  858   chisq = 0.;
  859   nw_sum = 0.0;
  860   nwx_sum = 0.0;
  861   nwxsqr_sum = 0.0;
  862   offset = 0;
  863 
  864   for (i=0; i<np; i++) {
  865     if (infinity[i]) {
  866       offset++;
  867     }
  868     ind = i+offset;
  869     ycalc = a0 + a1*logx[i];
  870     neq = ycalc-5;
  871     if (neq > 0.) {
  872       positiv = TRUE;
  873       neq = neq * (-1.);
  874     }
  875     else {
  876       positiv = FALSE;
  877     }
  878 
  879     propcalc = get_norm_int(neq);
  880     if (positiv) {
  881       propcalc = 1. - propcalc;
  882     }
  883     w = SQR(get_norm_ord(ycalc-5.))/(propcalc*(1.-propcalc));
  884 /*    printf("%g  %g  %g\n", logx[i], ycalc-5, w);    */
  885     nw_sum += num[ind]*w;
  886     nwx_sum += num[ind]*w*logx[i];
  887     nwxsqr_sum += num[ind]*w*SQR(logx[i]);
  888     oc = SQR(effect[ind] - (num[ind]*propcalc))/
  889       (num[ind]*propcalc*(1.-propcalc));
  890     chisq += oc;
  891     /*
  892     out_r("i=%i ind=%i num=%f effect=%f y=%f P=%f w=%f oc=%f logx=%g\n",
  893           i, ind, num[ind], effect[ind], ycalc, propcalc, w, oc, logx[i]);
  894     */
  895   }
  896 
  897 
  898   /*
  899   out_r("np = %i\n", np);
  900   out_r("nwxsqr_sum = %g\n", nwxsqr_sum);
  901   out_r("nwx_sum    = %g\n", nwx_sum);
  902   out_r("nw_sum     = %g\n", nw_sum);
  903   */
  904 
  905   nwxsqr_sum = nwxsqr_sum - ( SQR(nwx_sum)/nw_sum );
  906   x_mean = nwx_sum/nw_sum;
  907   a0 = y_mean - a1*x_mean;
  908   ed50 = (5.0-a0)/a1;
  909   mq_m = 1./SQR(a1) * (1./nw_sum + SQR(ed50-x_mean)/nwxsqr_sum);
  910   s_m = sqrt(mq_m);
  911   conf_l = ed50-(1.96*s_m);
  912   conf_u = ed50+(1.96*s_m);
  913   ed90 = (6.28-a0)/a1;
  914 
  915   /*
  916   out_r("nwxsqr = %g\n", nwxsqr_sum);
  917   out_r("x_mean = %g\n", x_mean);
  918   out_r("mq_m   = %g\n", mq_m);
  919   out_r("s_m    = %g\n", s_m);
  920   */
  921 
  922   colorize(ClHeader);
  923   out_r(_("Result probit analysis:\n") );
  924   colorize(ClDefault);
  925   if (a1<0.0) {
  926     out_err(MWA, ERR_FILE, ERR_LINE,
  927     _("Inverse probit curve due to negative slope a1!!!") );
  928   }
  929   out_r("ED50 = %g\n", pow(10., ed50));
  930   out_r(_("Confidence range (95%%) for ED50: [%g, %g]\n"),
  931          pow(10., conf_l), pow(10., conf_u));
  932 /*
  933   out_r("nw_sum=%f nwx_sum=%f x_mean=%f s_m=%f\n",
  934          nw_sum, nwx_sum, x_mean, s_m);
  935 */
  936   out_r("ED90 = %g\n", pow(10., ed90));
  937   out_r("a0   = %g\n", a0);
  938   out_r("a1   = %g\n", a1);
  939   out_r( _("Chi^2= %g\n"), chisq);
  940   out_r(_("Degrees of freedom = %i\n"),(np-2));
  941   out_r(_("Correlation coefficient r = %f\n"), r_xy);
  942   out_r(_("Check value Tau = %f\n"), tau);
  943   if (tau>0.0) {
  944     out_r(_("\nt-Test with check value Tau:\n") );
  945     out_r(_("Hypothesis H0: Correlation according to probit-model exists\n") );
  946     alpha = get_t_int(tau, (np-2));
  947     out_r(_("Probability of H0: %f\n"), alpha);
  948   }
  949   else {
  950     out_r(_("t-Test with Tau not possible since Tau = 0\n") );
  951   }
  952   out_r(_("\nChi^2-test:\n") );
  953   out_r(_("Hypothesis H0: r=0  vs. H1: r#0\n") );
  954   alpha = get_chi_int(chisq, (np-2));
  955   out_r(_("Probability of H0: %6.4f\n\n"), 1.0-alpha);
  956 
  957   if (!noplot) {
  958     dose0 = pow(10., ((2.1-a0)/a1)); /* ~ 0% Probability */
  959     dose1 = pow(10., ((8.5-a0)/a1)); /* ~ 100 % Probability */
  960 #ifndef STATIST_X
  961     out_r( _("doselab=|%s|, effectlab=|%s|\n"),
  962     get_name(dose), get_name(effect));
  963 #endif
  964     plot_probit(dose, num, effect, n, a0, a1, dose0, dose1,
  965         get_label(dose), get_label(effect));
  966   }
  967   out_end();
  968 
  969 }
  970 
  971 /* =================================================================== */
  972 
  973 
  974 void kolmo_test(REAL x[], int n) {
  975   REAL  mean, sdv;
  976   int   i, k, alpha;
  977   REAL  d, norm0, *cdf, *z, diff, fn1 = 0, fn2 = 0, z0 = 0;
  978 
  979   mean = get_mean(x,n);
  980   sdv = get_sdv(x,n);
  981 
  982 
  983   z =   (REAL*)m_calloc(n, sizeof(REAL));
  984   cdf = (REAL*)m_calloc(n, sizeof(REAL));
  985   for (i=0; i<n; i++) {
  986     DIV(z[i], (x[i]-mean), sdv);
  987     cdf[i] = (REAL)(i+1)/(REAL)n;
  988   }
  989 
  990   qsort(z, n, sizeof(REAL), real_compar_up);
  991 
  992   d = 0.0;
  993   for (i=0; i<n; i++) {
  994     norm0 = get_norm_int(z[i]);
  995 
  996     k = i+1;
  997     do {
  998       k--;
  999       diff = fabs(cdf[k]-norm0);
 1000 /*      printf("z=%g cdf=%g  norm=%g  d=%g\n", z[k], cdf[k], norm0, diff);  */
 1001       if (diff > d) {
 1002     d = diff;
 1003     fn1 = cdf[k];
 1004     fn2 = norm0;
 1005     z0 = z[i];
 1006       }
 1007     }
 1008     while (equal(z[k], z[i]));
 1009   }
 1010 
 1011   if (!noplot) {
 1012     plot_cdf_ks(z, n, z0, fn1, fn2, x, get_label(x));
 1013   }
 1014 
 1015   alpha = pks(d, n);
 1016 
 1017   /* prob=pks(sqrt( SQR((REAL)n) / ((REAL)(2*n)))*(d));    */
 1018 
 1019   out_start();
 1020   out_r(_("Hypothesis H0: Data is normally distributed versus\n") );
 1021   out_r(_("Hypothesis H1: Data is not normally distributed\n\n") );
 1022 
 1023   colorize(ClHeader);
 1024   out_r(_("Result KS-Lilliefors-Test on normal distribution:\n") );
 1025   colorize(ClDefault);
 1026   out_r(_("D (calculated)= %f\n"), d);
 1027   out_r(_("Number of data = %i\n"), n);
 1028 
 1029   out_r(_("Mean = %g\n"), mean);
 1030   out_r(_("Standard deviation = %g\n"), sdv);
 1031 
 1032   if (alpha > 0) {
 1033     out_r(_("H0 accepted with a significance level of %i%%\n"), alpha);
 1034   }
 1035   else {
 1036     out_r(_("H0 rejected\n") );
 1037   }
 1038   out_end();
 1039 }
 1040 
 1041 void percentiles(REAL x[], int n) {
 1042   REAL  mean, sdv;
 1043   int   i, index;
 1044   REAL  p, *z, alpha = 0;
 1045 
 1046   mean = get_mean(x,n);
 1047   sdv = get_sdv(x,n);
 1048 
 1049 
 1050   z =   (REAL*)m_calloc(n, sizeof(REAL));
 1051   for (i=0; i<n; i++) {
 1052     z[i] = x[i];
 1053   }
 1054 
 1055   qsort(z, n, sizeof(REAL), real_compar_up);
 1056 
 1057   if (!noplot) {
 1058     plot_cdf(z, n, get_label(x));
 1059   }
 1060 
 1061   out_start();
 1062   out_r(_("Percentiles for column \"%s\"\n"), get_label(x));
 1063   alpha = 0;
 1064   for(i = 0; i < 9; i++) {
 1065     alpha += 0.1;
 1066     index = (int)(ceil((REAL)n * alpha));
 1067     p = z[index-1];
 1068     out_r("%i%%\t%g\n", (int)(alpha*100.5), p);
 1069   }
 1070   alpha = 0.95;
 1071   index = (int)(ceil((REAL)n * alpha));
 1072   p = z[index-1];
 1073   out_r("%d%%\t%g\n", (int)(alpha*100.0), p);
 1074   alpha = 1.0;
 1075   index = (int)(ceil((REAL)n * alpha));
 1076   p = z[index-1];
 1077   out_r("%d%%\t%g\n\n", (int)(alpha*100.0), p);
 1078 
 1079   out_end();
 1080 }
 1081 
 1082 
 1083 /* ===================================================================
 1084 
 1085 void compare_dist(REAL x[], int nx, REAL y[], int ny) {
 1086   REAL d, prob, alpha;
 1087 
 1088   ks(x, nx, y, ny, &d, &prob);
 1089 
 1090   out_start();
 1091   out_r("Hypothese H0: x und y sind gleich verteilt gegen  \n");
 1092   out_r("Hypothese H1: x und y sind nicht gleich verteilt  \n\n");
 1093 
 1094   out_r("Ergebnis Kolmogoroff-Smirnoff-Test auf Normalverteilung:\n");
 1095   out_r("D (berechnet)= %f\n", d);
 1096   out_r("Anzahl der x-Werte = %i\n", nx);
 1097   out_r("Anzahl der y-Werte = %i\n", ny);
 1098 
 1099   if (prob == 0.0) {
 1100     alpha = 0.0;
 1101   }
 1102   else {
 1103     alpha = 1.0 - prob;
 1104   }
 1105   out_r("Irrtumswahrscheinlichkeit alpha fuer H0 = %f\n\n", alpha);
 1106   out_end();
 1107 }
 1108 
 1109   =================================================================== */
 1110 
 1111 
 1112 void equal_freq(REAL x[], int n) {
 1113   int *y, class[MCLASS], freq[MCLASS];
 1114   int nclass, i, k, df;
 1115   REAL chisq, phi, alpha, chi;
 1116   BOOLEAN found;
 1117 
 1118   nclass = 0;
 1119   y = (int*) m_calloc(n, sizeof(int));
 1120   for (i=0; i<n; i++) {
 1121     y[i] = get_round(x[i]);
 1122   }
 1123 
 1124   for (i=0; i<n; i++) {
 1125     found = FALSE;
 1126     for (k=0; k<nclass; k++) {
 1127       if (y[i]==class[k]) {
 1128     found = TRUE;
 1129     freq[k] ++;
 1130     break;
 1131       }
 1132     }
 1133     if (!found) {
 1134       class[nclass] = y[i];
 1135       freq[nclass] = 1;
 1136       nclass ++;
 1137     }
 1138     if(nclass > MCLASS){
 1139       out_err(MAT, ERR_FILE, ERR_LINE, 
 1140       _("Test aborted: There are more than %i classes of frequency."),
 1141       MCLASS);
 1142       return;
 1143     }
 1144   }
 1145 
 1146   out_start();
 1147   for (k=0; k<nclass; k++) {
 1148     if (freq[k]<=5) {
 1149       out_r(_("Warning: This test shouldn't be applied,\n"
 1150         "\tsince there are frequencies <= 5!\n\n") );
 1151       break;
 1152     }
 1153   }
 1154 
 1155 
 1156   chisq = 0;
 1157   phi = (REAL)n/(REAL)nclass;
 1158 
 1159   if ( (nclass==2) && (n<200) ) {
 1160     out_r(_("Correction according to YATES applied, since just 2 classes and n<200\n\n") );
 1161     if (n<25) {
 1162       out_r(_("Warning: FISCHER-Test shouldn't be applied,\n\tsince number of values <25\n\n") );
 1163     }
 1164     for (k=0; k<nclass; k++) {
 1165       /* chisq += SQR(fabs(freq[k]-phi)-0.5)/phi; */
 1166       DIV(chi, SQR(fabs(freq[k]-phi)-0.5), phi);
 1167       chisq += chi;
 1168     }
 1169   }
 1170   else {
 1171     for (k=0; k<nclass; k++) {
 1172       /* chisq += SQR( (REAL)freq[k] - phi)/phi; */
 1173       DIV(chi, SQR( (REAL)freq[k] - phi), phi);
 1174       chisq += chi;
 1175     }
 1176   }
 1177 
 1178   df = nclass -1;
 1179 
 1180   colorize(ClHeader);
 1181   out_r(_("Result Chi^2-Test of equal frequency:\n") );
 1182   colorize(ClDefault);
 1183   out_r(_("Hypothesis H0: Values have equal frequency\n") );
 1184   out_r(_("Hypothesis H1: Values don't have equal frequencies\n\n") );
 1185 
 1186   if (df >= 1) {
 1187     out_r( _("Chi^2 = %f\n"), chisq);
 1188     out_r(_("Degrees of freedom = %i\n"), df);
 1189     alpha = 1. - get_chi_int(chisq, df);
 1190     out_r(_("Probability of H0: %6.4f\n\n"), 1.0-alpha);
 1191   }
 1192   else {
 1193     out_r(_("Chi^2-Test of normal distribution not possible since degrees of freedom < 1!\n\n") );
 1194   }
 1195   out_end();
 1196 }
 1197 
 1198 /* ========================================================================== */
 1199 
 1200 
 1201 void compare_freq(REAL x[], int nx, REAL y[], int ny) {
 1202   typedef struct {
 1203     int class, x, y;
 1204   } FREQUENCIES;
 1205 
 1206   int *ix, *iy;
 1207   int nclass, i, k, df;
 1208   REAL chisq, phi, alpha, chi;
 1209   BOOLEAN found;
 1210   FREQUENCIES freq[MCLASS];
 1211 
 1212   nclass = 0;
 1213   for (i=0; i<MCLASS; i++) {
 1214     freq[i].x = 0;
 1215     freq[i].y = 0;
 1216   }
 1217 
 1218   ix = (int*) m_calloc(nx, sizeof(int));
 1219   for (i=0; i<nx; i++) {
 1220     ix[i] = get_round(x[i]);
 1221   }
 1222 
 1223   for (i=0; i<nx; i++) {
 1224     found = FALSE;
 1225     for (k=0; k<nclass; k++) {
 1226       if (ix[i]==freq[k].class) {
 1227     found = TRUE;
 1228     freq[k].x ++;
 1229     break;
 1230       }
 1231     }
 1232     if (!found) {
 1233       freq[nclass].class = ix[i];
 1234       freq[nclass].x = 1;
 1235       nclass ++;
 1236     }
 1237   }
 1238 
 1239 
 1240   iy = (int*) m_calloc(ny, sizeof(int));
 1241   for (i=0; i<ny; i++) {
 1242     iy[i] = get_round(y[i]);
 1243   }
 1244 
 1245   for (i=0; i<ny; i++) {
 1246     found = FALSE;
 1247     for (k=0; k<nclass; k++) {
 1248       if (iy[i]==freq[k].class) {
 1249     found = TRUE;
 1250     freq[k].y ++;
 1251     break;
 1252       }
 1253     }
 1254     if (!found) {
 1255       freq[nclass].class = iy[i];
 1256       freq[nclass].y = 1;
 1257       nclass ++;
 1258     }
 1259     if(nclass > MCLASS){
 1260       out_err(MAT, ERR_FILE, ERR_LINE, 
 1261       _("Test aborted: There are more than %i classes of frequency."),
 1262       MCLASS);
 1263       return;
 1264     }
 1265   }
 1266 
 1267   out_start();
 1268   for (k=0; k<nclass; k++) {
 1269     if (freq[k].x<=5) {
 1270       out_r(_("Warning: This test shouldn't be applied,\n"
 1271         "\tsince there are frequencies <= 5!\n\n") );
 1272       break;
 1273     }
 1274   }
 1275 
 1276   chisq = 0;
 1277 
 1278   if ( (nclass==2) && (nx<200) ) {
 1279     out_r(_("Correction according to YATES applied, since just 2 classes and n<200\n\n") );
 1280     if (nx<25) {
 1281       out_r(_("Warning: FISCHER-Test shouldn't be applied,\n\tsince number of values <25\n\n") );
 1282     }
 1283     for (k=0; k<nclass; k++) {
 1284       phi = ((REAL)freq[k].y/(REAL)ny)*(REAL)nx;
 1285       /* chisq += SQR(fabs(freq[k].x-phi)-0.5)/phi; */
 1286       DIV(chi, SQR(fabs(freq[k].x-phi)-0.5), phi);
 1287       chisq += chi;
 1288     }
 1289   }
 1290   else {
 1291     for (k=0; k<nclass; k++) {
 1292       phi = ((REAL)freq[k].y/(REAL)ny)*(REAL)nx;
 1293       /* chisq += SQR( (REAL)freq[k].x - phi)/phi; */
 1294       DIV(chi, SQR( (REAL)freq[k].x - phi), phi);
 1295       chisq += chi;
 1296     }
 1297   }
 1298 
 1299   df = nclass -1;
 1300 
 1301   colorize(ClHeader);
 1302   out_r(_("Result Chi^2-Test of equal frequency:\n") );
 1303   colorize(ClDefault);
 1304   out_r(_("Hypothesis H0: x and y are equally distributed\n") );
 1305   out_r(_("Hypothesis H1: x and y are not equally distributed\n") );
 1306 
 1307   if (df >= 1) {
 1308     out_r( _("Chi^2 = %f\n"), chisq);
 1309     out_r(_("Degrees of freedom: %i\n"), df);
 1310     alpha = 1. - get_chi_int(chisq, df);
 1311     out_r(_("Probability of H0: %6.4f\n\n"), 1.0-alpha);
 1312   }
 1313   else {
 1314     out_r(_("Chi^2-Test of normal distribution not possible since degrees of freedom < 1!\n\n") );
 1315   }
 1316   out_end();
 1317 }
 1318 
 1319 /* ========================================================================== */
 1320 
 1321 
 1322 
 1323 void lin_reg(REAL x[], REAL y[], int n) {
 1324   REAL x_mean, x_sdv, y_mean, y_sdv;
 1325   REAL s_xy, r_xy, a1, a0, rr, t, alpha;
 1326   /*  REAL sse, qysum; */
 1327   int  df;
 1328 
 1329   x_mean = get_mean(x,n);
 1330   y_mean = get_mean(y,n);
 1331   x_sdv = get_sdv(x,n);
 1332   y_sdv = get_sdv(y,n);
 1333   s_xy = (get_xysum(x,y,n) - ((get_sum(x,n)*get_sum(y,n))/(REAL) n))/
 1334          (REAL) (n-1);
 1335   DIV(r_xy, s_xy, x_sdv*y_sdv);
 1336   rr = r_xy * r_xy;
 1337   a1 = r_xy * (y_sdv/x_sdv);
 1338   a0 = y_mean - a1*x_mean;
 1339 
 1340   df = n - 2;
 1341   /* z = r_xy * sqrt((REAL)n-1.); */
 1342 /*
 1343   sse = 0.0;
 1344   qysum = 0.0;
 1345   for (i=0; i<n; i++) {
 1346     sse += SQR(y[i] - (a0 + a1*x[i]));
 1347     qysum += SQR(y[i] - y_mean);
 1348   }
 1349 */
 1350   out_start();
 1351   colorize(ClHeader);
 1352   out_r(_("\nResults of linear regression:\n") );
 1353   colorize(ClDefault);
 1354   out_r(_("number of data points n   : %i \n"),n);
 1355   out_r(_("Intersection a0           : %g \n"),a0);
 1356   out_r(_("Slope a1                  : %g \n"),a1);
 1357   out_r(_("Correlation coefficient r : %g \n"),r_xy);
 1358   out_r(_("Coefficient of determination r^2      : %g \n"),rr);
 1359   out_r(_("Degr. of freedom df = n-2 : %i \n"), df);
 1360 /*  out_r("qysum=%f6.4, sse=%6.4f, r^2=%6.4f\n", qysum, sse, 1-sse/qysum); */
 1361   if (fabs(r_xy) < 0.999999999) {
 1362     t = r_xy * sqrt(((REAL)n-2.)/(1.-SQR(r_xy)));
 1363     out_r(_("Estimated t-value         : %f\n"), t);
 1364     alpha = get_t_int(fabs(t), df);
 1365     out_r(_("\nt-Test:\n") );
 1366     out_r(_("Hypothesis H0: r = 0  against hypothesis H1: r1 # 0  (->two-sided)\n") );
 1367     /* a = alpha;  */
 1368     out_r(_("Probability of H0 = %6.4f\n\n"), 1.0-alpha);
 1369   } else{
 1370     out_r(_("t-Test not possible since |r| = 1!\n") );
 1371   }
 1372 
 1373   if (!noplot) {
 1374     plot_pair(x, y, n, a0, a1, get_label(x), get_label(y));
 1375   }
 1376 
 1377   out_r("\n");
 1378   out_end();
 1379 }
 1380 
 1381 
 1382 
 1383 /* =================================================================== */
 1384 
 1385 void point_biserial_reg(REAL x[], REAL y[], int n) {
 1386   REAL m_y0, m_y1, ysdv, *y0, *y1;
 1387   REAL r_pb, rr, t, alpha;
 1388   int *category;
 1389   int i, n0=0, n1=0, df;
 1390 
 1391   y0 = (REAL*)m_calloc(n, sizeof(REAL));
 1392   y1 = (REAL*)m_calloc(n, sizeof(REAL));
 1393   category = (int*)m_calloc(n, sizeof(int));
 1394 
 1395   for (i=0; i<n; i++) {
 1396     category[i] = get_round(x[i]);
 1397     if ((category[i] != TRUE) && (category[i] != FALSE)) {
 1398       out_err(ERR, ERR_FILE, ERR_LINE,
 1399       _("First column must contain only 0's and 1's!"));
 1400       return;
 1401     }
 1402     if (category[i]) {
 1403       y1[n1] = y[i];
 1404       n1++;
 1405     }
 1406     else {
 1407       y0[n0] = y[i];
 1408       n0++;
 1409     }
 1410   }
 1411 
 1412   m_y0 = get_mean(y0, n0);
 1413   m_y1 = get_mean(y1, n1);
 1414 
 1415   ysdv = get_sdv(y, n);
 1416   DIV(r_pb, (m_y1-m_y0), ysdv);
 1417   r_pb *= sqrt((REAL)(n1*n0)/(n*(n-1)));
 1418   /* r_pb = (m_y1-m_y0)/ysdv*sqrt((REAL)(n1*n0)/(n*(n-1)));  */
 1419 
 1420   rr = r_pb*r_pb;
 1421   df = n - 2;
 1422 
 1423   out_start();
 1424   colorize(ClHeader);
 1425   out_r(_("\nResult point biserial correlation:\n") );
 1426   colorize(ClDefault);
 1427   out_r(_("Number of data points n  : %i \n"), n);
 1428   out_r(_("Correlation coefficient r: %20.12e \n"), r_pb);
 1429   out_r(_("Coefficient of determination r^2     : %20.12e \n"), rr);
 1430   out_r(_("Degrees of freedom df = n-2 : %i \n"), df);
 1431 
 1432   if (fabs(r_pb) < 1.) {
 1433      t = r_pb * sqrt(((REAL)n-2.)/(1.-SQR(r_pb)));
 1434      /* f = SQR(r_pb)/(1.-SQR(r_pb)) * (REAL)(n-2); */
 1435     out_r(_("Calculated t-value      : %f \n"), t);
 1436     alpha = get_t_int(fabs(t), df);
 1437     out_r(_("\nt-Test:\n") );
 1438     out_r(_("Hypothesis H0: r = 0  versus hypothesis H1: r1 # 0  (->two-sided)\n") );
 1439     /* a = alpha; */
 1440     out_r(_("Probability of H0 = %6.4f\n\n"), 1.0-alpha);
 1441   } else{
 1442     out_r(_("t-Test not possible since |r| = 1!\n") );
 1443   }
 1444 
 1445   out_r("\n");
 1446   out_end();
 1447 }
 1448 
 1449 
 1450 /* =================================================================== */
 1451 
 1452 
 1453 void histogram(REAL x[], int n, int mclass, REAL min, REAL max) {
 1454             /* calculates the classes for a histrogram
 1455                         calls: plot_hist() or plot_histo_discrete() with it*/
 1456 
 1457   int   aclass[MCLASS];   /* counter for the values in class [i]    */
 1458   REAL  classval[MCLASS], fac, fac_short,arg, step;
 1459   BOOLEAN zeroclass = FALSE;  /* true, if a class remains empty     */
 1460   BOOLEAN autoclass;      /* whether autoclass is selected or not   */
 1461   BOOLEAN hit_max_mark;   /* TRUE, when a point hits the max mark   */
 1462   BOOLEAN discrete=FALSE; /* if we have only points, hitting the marks  */
 1463   int   lower=0,higher=0; /*counts, how many values are not counted in aclass*/
 1464   int interval;       /* width of one class             */
 1465   int i,ii;       /* loop variables             */
 1466 
 1467   autoclass=(mclass==0);    /* check for autoclass          */
 1468 
 1469   if (autoclass) {                  /* autoclass was choosen*/
 1470                 /* start with a good approximation
 1471                 of the number of classes. Adjust that,
 1472                 as long, as we get empty classes    */
 1473     mclass = get_round(1. + 3.32*log10((REAL)n));
 1474   }
 1475 
 1476 
 1477   do {
 1478     for (i=0; i<mclass; i++) {/* clear class counters           */
 1479       aclass[i] = 0;
 1480     }
 1481     lower=higher=0;     /* reset counters           */
 1482     hit_max_mark=FALSE;     /* resetting                */
 1483 
 1484     fac = (max-min)/(REAL)(mclass);             /* size of each intervall*/
 1485     for (i=0; i<n; i++) {
 1486       if(x[i]<min) { lower++; continue; }     /* count lower values   */
 1487 
 1488       arg = (x[i]-min)/fac;
 1489       interval = (int) arg;
 1490       /* see if the value lower or equal to max
 1491      inc. aclass[intervall] or higher accordingly*/
 1492       if(interval<mclass)
 1493       {
 1494     aclass[interval]++;
 1495       }else
 1496       {                         /* We have to include the right border for
 1497                                 cases with pure integers                */
 1498     if(x[i]<=max)
 1499     {
 1500       aclass[mclass-1]++;
 1501                 /* if a value hits the last point, there is
 1502                                 a chance that we are discrete           */
 1503       hit_max_mark=TRUE;
 1504     }
 1505     else
 1506       higher++;
 1507       }
 1508     }
 1509                 /* if we are in automatic class selection
 1510                                 check for for classes, which are empty
 1511                                 with the current number of classes  */
 1512     if(autoclass)
 1513     {
 1514       zeroclass = FALSE;
 1515       for (i=0; i<mclass; i++) {
 1516     if (aclass[i] == 0) {
 1517       zeroclass = TRUE;
 1518       mclass --;
 1519       break;
 1520 
 1521     }
 1522       }
 1523     }
 1524   } while (autoclass && zeroclass);
 1525 
 1526                 /* test if we are discrete, when a point has
 1527                 hit the max mark with the last mclass number*/
 1528   if(hit_max_mark)
 1529   {
 1530     REAL help;
 1531     discrete=TRUE;
 1532     fac_short=(max-min)/(REAL)(mclass-1);
 1533 
 1534     for(ii=0; ii<n;ii++)
 1535     {   help=(x[ii]-min)/fac_short;
 1536       if( help!=(int)(help))
 1537       { discrete=FALSE; break; }
 1538     }
 1539   }
 1540 
 1541                     /* we have aclass[] properly filled*/
 1542   out_start();
 1543   if(discrete)
 1544   {
 1545     out_d(_("Discrete data with exactly %d classes!\n"), mclass);
 1546     step = (max-min)/(REAL)(mclass-1);
 1547     for (i=0; i<mclass; i++) {
 1548       classval[i] = min + (step*(REAL)i);
 1549     }
 1550   } else
 1551   {
 1552     step = (max-min)/(REAL)(mclass);
 1553     for (i=0; i<mclass; i++) {
 1554       classval[i] = min + (step/2.) + (step*(REAL)i);
 1555     }
 1556   }
 1557 
 1558   if ((!thist) && (!noplot)){
 1559     if(discrete)
 1560       plot_histo_discrete(classval, aclass, mclass, step, x, get_label(x));
 1561     else
 1562       plot_histo(classval, aclass, mclass, step, x, get_label(x));
 1563   }
 1564   else {
 1565     if( (!bernhard)|| ( bernhard && thist ))
 1566     {
 1567       out_r(_("\nHistogram (class center - number of values)\n") );
 1568       print_histo(classval, aclass, mclass);
 1569     }
 1570   }
 1571   out_end();
 1572 } /* end of histogram() */
 1573 
 1574 
 1575 
 1576 void standard(REAL x[], int n, int mclass, REAL min, REAL max) {
 1577   REAL mean, sdv, sdv_mean, var_coef, median;
 1578   REAL *xh, t, conf;
 1579   REAL q_l, q_u, index, no_u, no_l;
 1580   int i, i_l, i_u;
 1581 
 1582   int   xcrit;
 1583 
 1584 /* critical values for 95% confidence intervals of the Medians (from NEAVE):*/
 1585   int critical[50] = {
 1586     -1, -1, -1, -1, -1, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3,
 1587     3, 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9, 10, 10, 11,
 1588     11, 12, 12, 12, 13, 13, 14, 14, 15, 15, 15, 16, 16, 17, 17
 1589   };
 1590   REAL med_conf_lo, med_conf_up;
 1591 
 1592 /*  if (mclass==0) {
 1593     min = get_min(x,n);
 1594     max = get_max(x,n);
 1595   }*/
 1596 
 1597 
 1598   out_start();
 1599   if (max==min) {
 1600     if(!bernhard) {
 1601       out_err(MAT, ERR_FILE, ERR_LINE, _("All values are equal!") );
 1602     } else
 1603     {
 1604       colorize(ClHeader);
 1605       out_r(_("#Result general statistical information in a table\n") );
 1606       colorize(ClDefault);
 1607       out_r(_("#All values are equal!\n") );
 1608       out_r("#n\tmean\tm-conf\tm+conf\tmedian\tme_c_lo\tme_c_up"
 1609       "\tquar_lo\tquar_up\tsdv\tvarc(%%)\tsdv_err\tmin\tmax\n");
 1610       out_r("%i\t",  n);
 1611       out_r("%g\t%g\t%g\t", min, min, min);
 1612       out_r("%g\t%g\t%g\t", min, min, min);
 1613       out_r("%g\t", min);
 1614       out_r("%g\t", min);
 1615       out_r("%g\t", 0);
 1616       out_r("%f\t", 0);
 1617       out_r("%g\t", 0);
 1618       out_r("%g\t", get_min(x,n));
 1619       out_r("%g\t\n", get_max(x,n));
 1620 
 1621     }
 1622 
 1623     out_end();
 1624     return;
 1625   }
 1626   if (min > max) {
 1627     out_err(MAT, ERR_FILE, ERR_LINE,
 1628     _("Minimum is larger than maximum!") );
 1629     out_end();
 1630     return;
 1631   }
 1632 
 1633   mean = get_mean(x,n);
 1634   sdv = get_sdv(x,n);
 1635   sdv_mean = sdv/sqrt((REAL)n);
 1636   var_coef = (sdv/mean) * 100.;
 1637   t = get_t(0.975, (n-1));
 1638   conf = (t*sdv)/sqrt((REAL)n);
 1639 
 1640 
 1641   histogram(x,n,mclass,min,max);    /* calc and plot the histogram  */
 1642 
 1643 
 1644   xh = (REAL*)m_calloc(n, sizeof(REAL));
 1645 
 1646   for (i=0; i<n; i++) {
 1647     xh[i] = x[i];
 1648   }
 1649 
 1650   qsort(xh, n, sizeof(REAL), real_compar_up);
 1651 
 1652   if (n % 2 == 1) {
 1653     i = (n-1)/2;
 1654     median = xh[i];
 1655   }
 1656   else {
 1657     i = (n/2) -1;
 1658     median = xh[i];
 1659     i++;
 1660     median = (median + xh[i])/2.;
 1661   }
 1662 
 1663   index = (REAL)n*0.25;
 1664   if (index==floor(index)) {
 1665     i_l = (int)index-1;
 1666     i_u = (int)index;
 1667   }
 1668   else {
 1669     i_l = (int) floor(index-1.);
 1670     i_u = (int) ceil(index-1.);
 1671   }
 1672 
 1673   q_l = 0.5 * (xh[i_l]+xh[i_u]);
 1674   q_u = 0.5 * (xh[n-i_l-1]+xh[n-i_u-1]);
 1675   no_u = median + 1.58*(q_u-q_l)/sqrt((REAL)n);
 1676   no_l = median - 1.58*(q_u-q_l)/sqrt((REAL)n);
 1677 
 1678   if (n<6) {
 1679     med_conf_lo = med_conf_up = 0.0;
 1680   }
 1681   else if (n<=50) {
 1682     med_conf_lo = xh[critical[n-1]];
 1683     med_conf_up = xh[n-critical[n-1]-1];
 1684   }
 1685   else {
 1686     xcrit = (int)floor(-(0.5*sqrt((REAL)n)) *1.96-0.5+0.5*(REAL)n);
 1687     med_conf_lo = xh[xcrit];
 1688     med_conf_up = xh[n-xcrit-1];
 1689   }
 1690     /*
 1691        printf("crit(50)=%i\n", (int)floor(-(0.5*sqrt(50.0)) *1.96-0.5+0.5*50.0));
 1692        */
 1693   if(!bernhard)
 1694   {
 1695     colorize(ClHeader);
 1696     out_r(_("\nResult general statistical information:\n") );
 1697     colorize(ClDefault);
 1698     out_r(_("Count                          : %i\n"),  n);
 1699     out_r(_("Mean                           : %g  [%g, %g] (95%%)\n"),
 1700     mean, mean-conf, mean+conf);
 1701     out_r(_("Median                         : %g  [%g, %g] (95%%)\n"),
 1702     median, med_conf_lo, med_conf_up);
 1703   /*  out_r("                               :     [%g, %g] (90%%)\n",
 1704       no_l, no_u);
 1705       */
 1706     out_r(_("25%% quartile                   : %g\n"), q_l);
 1707     out_r(_("75%% quartile                   : %g\n"), q_u);
 1708     out_r(_("Standard deviation             : %g\n"), sdv);
 1709     out_r(_("Variation coefficient          : %f %%\n"), var_coef);
 1710     out_r(_("Standard error of mean         : %g\n"), sdv_mean);
 1711     out_r(_("Minimum                        : %g\n"), get_min(x,n));
 1712     out_r(_("Maximum                        : %g\n\n"), get_max(x,n));
 1713   }else
 1714   {
 1715     colorize(ClHeader);
 1716     out_r(_("#Result general statistical information in a table\n") );
 1717     colorize(ClDefault);
 1718     out_r("#n\tmean\tm-conf\tm+conf\tmedian\tme_c_lo\tme_c_up"
 1719     "\tquar_lo\tquar_up\tsdv\tvarc(%%)\tsdv_err\tmin\tmax\n");
 1720     out_r("%i\t",  n);
 1721     out_r("%g\t%g\t%g\t", mean, mean-conf, mean+conf);
 1722     out_r("%g\t%g\t%g\t", median, med_conf_lo, med_conf_up);
 1723     out_r("%g\t", q_l);
 1724     out_r("%g\t", q_u);
 1725     out_r("%g\t", sdv);
 1726     out_r("%f\t", var_coef);
 1727     out_r("%g\t", sdv_mean);
 1728     out_r("%g\t", get_min(x,n));
 1729     out_r("%g\t\n", get_max(x,n));
 1730 
 1731   }
 1732   out_end();
 1733 
 1734 }
 1735 
 1736 /* =================================================================== */
 1737 
 1738 
 1739 void rank_order(REAL x[], REAL y[], int n) {
 1740   REAL  rho, z, alpha, a;
 1741   int   i, df;
 1742 /*  BOOLEAN infinity = TRUE; */
 1743 
 1744 /* critical raw-Values for significance levels 1%, 2%, 5% and 10% for n <=30
 1745    indexes (two-tailed), is valid only for n >= 5! (from: SACHS, p. 230)
 1746   */
 1747 
 1748   const float sig[31][4] = {
 1749          {0.0000,0.0000,0.0000,0.0000}, {0.0000,0.0000,0.0000,0.0000},
 1750      {0.0000,0.0000,0.0000,0.0000}, {0.0000,0.0000,0.0000,0.0000},
 1751          {0.0000,0.0000,0.0000,0.0000}, {0.9001,0.9000,0.9000,0.8000},
 1752      {0.9429,0.8857,0.8286,0.7714}, {0.8929,0.8571,0.7450,0.6786},
 1753          {0.8571,0.8095,0.6905,0.5952}, {0.8167,0.7667,0.6833,0.5833},
 1754      {0.7818,0.7333,0.6364,0.5515}, {0.7454,0.7000,0.6091,0.5273},
 1755      {0.7273,0.6713,0.5804,0.4965}, {0.6978,0.6429,0.5549,0.4780},
 1756      {0.6474,0.6220,0.5341,0.4593}, {0.6536,0.6000,0.5179,0.4429},
 1757      {0.6324,0.5824,0.5000,0.4265}, {0.6152,0.5637,0.4853,0.4118},
 1758      {0.5975,0.5480,0.3994,0.3994}, {0.5825,0.5333,0.4579,0.3895},
 1759      {0.5684,0.5203,0.4451,0.3789}, {0.5545,0.5078,0.4351,0.3688},
 1760      {0.5426,0.4963,0.4241,0.3597}, {0.5306,0.4852,0.4150,0.3518},
 1761      {0.5200,0.4748,0.4061,0.3435}, {0.5100,0.4654,0.3977,0.3362},
 1762      {0.5002,0.4564,0.3894,0.3299}, {0.4915,0.4481,0.3822,0.3236},
 1763      {0.4828,0.4401,0.3749,0.3175}, {0.4744,0.4320,0.3685,0.3113},
 1764      {0.4665,0.4251,0.3620,0.3059}
 1765        };
 1766 
 1767 
 1768   if ((rho=get_rank_correlation(x, y, n))==REAL_MIN) {
 1769     return;
 1770   }
 1771 
 1772   out_start();
 1773   df = n-2;
 1774   colorize(ClHeader);
 1775   out_r(_("\nResult SPEARMAN's Rank-Correlation:\n") );
 1776   colorize(ClDefault);
 1777   out_r(_("rho = %f\n"), rho);
 1778   out_r(_("Degrees of freedom = n-2 = %i\n\n"), df);
 1779 
 1780   out_r(_("Hypothesis H0: rho=0 versus hypothesis H1: rho#0 (->two-sided)\n"));
 1781 
 1782   if (n<5) {
 1783     out_r(_("Test not possible since n<5 (too few values!)\n\n") );
 1784     out_end();
 1785     return;
 1786   }
 1787   else if ((n>=5) && (n<=30)) {
 1788     for (i=0; i<4; i++) {
 1789       if (fabs(rho) > (REAL)sig[n][i]) {
 1790     break;
 1791       }
 1792     }
 1793     switch (i) {
 1794     case 0: alpha = 0.01;
 1795       break;
 1796     case 1: alpha = 0.02;
 1797       break;
 1798     case 2: alpha = 0.05;
 1799       break;
 1800     case 3: alpha = 0.10;
 1801       break;
 1802     default: alpha = 1.0;
 1803       break;
 1804     }
 1805 
 1806     if (alpha < 1.0) {
 1807       out_r(_("H0 rejected at a level of significance of %4.2f\n\n"),
 1808         alpha);
 1809     }
 1810     else {
 1811       out_r(_("Alpha > 0.1 ==> H0 can not be rejected\n\n") );
 1812     }
 1813   }
 1814   else {
 1815 /*
 1816     a = get_norm_int(2.326);
 1817     alpha =  1.- ((1.-a)*2.0);
 1818     out_d("norm(2.326)=%f\n", alpha);
 1819 */
 1820     z = fabs(rho) * sqrt((REAL)n-1.0);
 1821     out_r(_("Significance checked using the normal distribution\n") );
 1822     out_d("z = %f\n", z);
 1823     a = get_norm_int(z);
 1824     alpha =  1.- ((1.-a)*2.0);
 1825     out_r(_("Probability of H0 = %6.4f\n\n"), 1.0-alpha);
 1826   }
 1827   out_end();
 1828 }
 1829 
 1830 /* =================================================================== */
 1831 
 1832 void t_test(REAL x[], int nx, REAL y[], int ny) {
 1833   REAL denom, t, x_mean, y_mean;
 1834   REAL x_sum, y_sum, x_qsum, y_qsum, alpha, a;
 1835   REAL x_qmean, y_qmean;
 1836   int  df;
 1837 /* t-Test for difference between two means of two samples */
 1838 
 1839   x_sum = get_sum(x,nx);
 1840   y_sum = get_sum(y,ny);
 1841   x_qsum = get_qsum(x,nx);
 1842   y_qsum = get_qsum(y,ny);
 1843 
 1844   x_mean = x_sum/(REAL)nx;
 1845   y_mean = y_sum/(REAL)ny;
 1846 
 1847 
 1848   DIV(x_qmean, SQR(x_sum), (REAL)nx);
 1849   DIV(y_qmean,  SQR(y_sum), (REAL)ny);
 1850   DIV(denom, (x_qsum - x_qmean + y_qsum - y_qmean), (REAL)(nx + ny - 2));
 1851   denom *= (1./nx + 1./ny);
 1852   DIV(t, fabs(x_mean - y_mean), sqrt(denom));
 1853   df = nx + ny -2;
 1854 
 1855   out_start();
 1856   colorize(ClHeader);
 1857   out_r(_("\nResult t-Test for independent samples\n") );
 1858   colorize(ClDefault);
 1859   out_r(_("Degrees of freedom = n1 + n2 - 2 = %i\n"), df);
 1860   if (t !=0 ) {
 1861     alpha = get_t_int(fabs(t), df);
 1862     out_r("t = %f\n", t);
 1863     out_r(_("\nHypothesis H0: mu1=mu2 versus hypothesis H1: mu1#mu2 (->two-sided)\n") );
 1864     a = alpha;
 1865     out_r(_("Probability of H0 = %6.4f\n\n"), 1.0-a);
 1866   }
 1867   else {
 1868     out_r(_("t-Test not possible since t = 0!\n") );
 1869   }
 1870 
 1871 /*
 1872   out_r("Hypothese H0: mu1=mu2  gegen  Hypothese H1: mu1>mu2 (->einseitig)\n");
 1873   sgn = get_sgn(t);
 1874   a = 1.- 0.5*(1. + alpha*sgn);
 1875   out_r("Wahrscheinlichkeit fuer H0 = %6.4f\n\n", 1.0-a);
 1876 
 1877   out_r("Hypothese H0: mu1=mu2  gegen  Hypothese H1: mu1<mu2 (->einseitig)\n");
 1878   a = 1.- 0.5*(1. - alpha*sgn);
 1879   out_r("Wahrscheinlichkeit fuer H0 = %6.4f\n\n", 1.0-a);
 1880 */
 1881   out_end();
 1882 }
 1883 
 1884 /* ====================================================================== */
 1885 
 1886 
 1887 
 1888 void pair_t_test(REAL x[], REAL y[], int n) {
 1889   REAL t, alpha, a, dif_sdv, dif_mean, *dif;
 1890   int  df, i;
 1891 /* t-Test for difference between two means of two paired samples */
 1892 
 1893   dif = (REAL*)m_calloc(n, sizeof(REAL));
 1894   for (i=0; i<n; i++) {
 1895     dif[i] = x[i]-y[i];
 1896   }
 1897   dif_sdv = get_sdv(dif, n);
 1898   dif_mean = get_mean(dif, n);
 1899   DIV(t, dif_mean*sqrt((REAL)n), sqrt(SQR(dif_sdv)));
 1900   t = fabs(t);
 1901   /*  t = fabs( (dif_mean*sqrt((REAL)n))/(sqrt(SQR(dif_sdv))) ); */
 1902   df = n-1;
 1903 
 1904   out_start();
 1905   colorize(ClHeader);
 1906   out_r(_("\nResult t-Test for pairwise ordered samples\n") );
 1907   colorize(ClDefault);
 1908   out_r(_("Degrees of freedom n-1 = %i\n"), df);
 1909   if (t !=0 ) {
 1910     alpha = get_t_int(fabs(t), df);
 1911     out_r("t = %f\n", t);
 1912     out_r(_("\nHypothesis H0: mu1=mu2 versus hypothesis H1: mu1#mu2 (->two-sided)\n") );
 1913     a = alpha;
 1914     out_r(_("Probability of H0 = %6.4f\n\n"), 1.0-a);
 1915   }
 1916   else {
 1917     out_r(_("t-Test not possible since t = 0!\n") );
 1918   }
 1919 
 1920 /*
 1921   out_r("Hypothese H0: mu1=mu2  gegen  Hypothese H1: mu1>mu2 (->einseitig)\n");
 1922   sgn = get_sgn(t);
 1923   a = 1.- 0.5*(1. + alpha*sgn);
 1924   out_r("Wahrscheinlichkeit fuer H0 = %6.4f\n\n", 1.0-a);
 1925 
 1926   out_r("Hypothese H0: mu1=mu2  gegen  Hypothese H1: mu1<mu2 (->einseitig)\n");
 1927   a = 1.- 0.5*(1. - alpha*sgn);
 1928   out_r("Wahrscheinlichkeit fuer H0 = %6.4f\n\n", 1.0-a);
 1929 */
 1930   out_end();
 1931 }
 1932 
 1933 /* ====================================================================== */
 1934 
 1935 
 1936 
 1937 void u_test(REAL x[], int nx, REAL y[], int ny) {
 1938 
 1939   SORTREC *vrec;
 1940   int  i, k, j, n;
 1941   REAL m, ux, uy, rx, ry, umin, z, alpha, t, denom, arg;
 1942 
 1943   n = nx+ny;
 1944 
 1945   vrec = (SORTREC*)m_calloc(n, sizeof(SORTREC));
 1946 
 1947   for (i=0; i<nx; i++) {
 1948     vrec[i].val = x[i];
 1949     vrec[i].ind = 0;
 1950   }
 1951 
 1952   for (i=nx; i<n; i++) {
 1953     vrec[i].val = y[i-nx];
 1954     vrec[i].ind = 1;
 1955   }
 1956 
 1957   qsort(vrec, n, sizeof(SORTREC), u_rank_compar);
 1958 
 1959   i = 0;
 1960   k = 0;
 1961   m = 0.;
 1962   t = 0.;
 1963   while (i<n) {
 1964     vrec[i].rank = (REAL)i+1.;
 1965     if ( (i<(n-1)) && (equal(vrec[i].val, vrec[i+1].val)) ) {
 1966       k++;
 1967       m += (REAL)i;
 1968     }
 1969     else {
 1970       if (k!=0) {
 1971         m += (REAL)i;
 1972         k ++;
 1973         /* t += (pow((REAL)k, 3.) - (REAL)k)/12.; */
 1974         t +=  ( (REAL)k * (SQR((REAL)k) -1.) ) /12.;
 1975         m = m/ (REAL)(k);
 1976         for (j=i; j>(i-k); j--) {
 1977           vrec[j].rank = m+1.;
 1978         }
 1979       }
 1980       k=0;
 1981       m=0.;
 1982     }
 1983     i++;
 1984   }
 1985 
 1986   rx = 0.0;
 1987   ry = 0.0;
 1988 
 1989   for (i=0; i<n; i++) {
 1990     if (vrec[i].ind == 0) {
 1991       rx += vrec[i].rank;
 1992     }
 1993     else {
 1994       ry += vrec[i].rank;
 1995     }
 1996 /*
 1997     out_r("Nr %i  ind %i  Wert %f  Rank %f\n",
 1998            i, vrec[i].ind, vrec[i].val, vrec[i].rank);
 1999 */
 2000   }
 2001 
 2002   ux = (REAL)nx*(REAL)ny + ( (REAL)nx*(REAL)(nx+1) )/2. - rx;
 2003   uy = (REAL)nx*(REAL)ny + ( (REAL)ny*(REAL)(ny+1) )/2. - ry;
 2004 
 2005   umin = (ux<uy) ? ux : uy;
 2006 /*  z = fabs(umin-((REAL)nx*(REAL)ny)/2.) /
 2007     sqrt( ( (REAL)nx*(REAL)ny*( (REAL)nx+(REAL)ny+1.) )/12.);
 2008 */
 2009   arg = (REAL)nx*(REAL)ny/((REAL)n*(REAL)(n-1))*((REAL)n*(SQR((REAL)n)-1.)/12. -t);
 2010   SQRT(denom, arg);
 2011   DIV(z, fabs(umin-((REAL)nx*(REAL)ny)/2.), denom);
 2012 /*
 2013   z = fabs(umin-((REAL)nx*(REAL)ny)/2.) /
 2014     sqrt( ( (REAL)nx*(REAL)ny/((REAL)n*(REAL)(n-1)) *
 2015            ( (REAL)n * (SQR((REAL)n)-1.)/12. -t ) )  );
 2016 */
 2017   out_start();
 2018   colorize(ClHeader);
 2019   out_r(_("\nResult u-Test:\n") );
 2020   colorize(ClDefault);
 2021   out_r("Rx = %f   Ry = %f\n", rx, ry);
 2022   out_r("Ux = %f   Uy = %f\n", ux, uy);
 2023   out_r("nx = %i   ny = %i\n", nx, ny);
 2024   out_r("U = %f\n", umin);
 2025 
 2026   out_r(_("\nHypothesis H0: x and y originate from the same set versus\n") );
 2027   /* Test on x > y if ux < uy. */
 2028   if( ux < uy ) {
 2029     out_r(_("Hypothesis H1: x stochastically larger than y (-> one-sided test) or\n") );
 2030 
 2031   } else { /* ux >= uy */
 2032     out_r(_("Hypothesis H1: x stochastically smaller than y (-> one-sided test) or\n") );
 2033   }
 2034   out_r(_("              x is different from y (-> two-sided test)\n\n") );
 2035 
 2036 
 2037 /*  if ( ((nx>=8) && (ny>=8)) || (nx>20) || (ny>20) ) {  */
 2038   if ( ((nx<7) || (ny<7)) || (nx<=19) || (ny<=19) ) {
 2039     out_r(_("Warning: Only rough approximation of probability possible!\n") );
 2040     out_r(_("Please check exact probability of alpha for h having %i "
 2041       "degrees of freedom\n"), ncol-1);
 2042     out_r(_("in the literature, e.g. in Table 16/17, pp. 599 in WEBER \n\n") );
 2043   }
 2044 
 2045   out_r(_("Normally distributed random variable   z = %f\n"), z);
 2046   out_r(_("Correction term for equal ranks t = %f\n"), t);
 2047   alpha = get_norm_int(z);
 2048   out_r(_("Probability of H0 (one-sided) = %6.4f\n"), 1.0-alpha);
 2049   alpha = 1.- ((1.-alpha)*2.);
 2050   out_r(_("Probability of H0 (two-sided) = %6.4f\n\n"), 1.0-alpha);
 2051   out_end();
 2052 }
 2053 
 2054 
 2055 /* ====================================================================== */
 2056 
 2057 
 2058 void kruskal_test(PREAL x[], int nrow[], int ncol) {
 2059   SORTREC *vrec;
 2060   unsigned int n=0, ir, ic, k=0, i, j;
 2061   REAL *r, nr, t, m, cor, prob, h=0.;
 2062 
 2063   for (ic=0; ic<ncol; ic++) {
 2064     n += nrow[ic];
 2065   }
 2066 
 2067   vrec = (SORTREC*)m_calloc(n, sizeof(SORTREC));
 2068   r = (REAL*)m_calloc(ncol, sizeof(REAL));
 2069 
 2070   for (ic=0; ic<ncol; ic++) {
 2071     r[ic] = 0.;
 2072     for (ir=0; ir<nrow[ic]; ir++) {
 2073       vrec[k].val = x[ic][ir];
 2074       vrec[k].ind = ic;
 2075       k++;
 2076     }
 2077   }
 2078   out_start();
 2079 #ifdef DEBUG
 2080   if (n != k) {
 2081     out_r(_("Error: n!=k\n") );
 2082   }
 2083 #endif
 2084 
 2085   qsort(vrec, n, sizeof(SORTREC), u_rank_compar);
 2086 
 2087   i = 0;
 2088   k = 0;
 2089   m = 0.;
 2090   t = 0.;
 2091   while (i<n) {
 2092     vrec[i].rank = (REAL)i+1.;
 2093     if ( (i<(n-1)) && (equal(vrec[i].val, vrec[i+1].val)) ) {
 2094       k++;
 2095       m += (REAL)i;
 2096     }
 2097     else {
 2098       if (k!=0) {
 2099         m += (REAL)i;
 2100         k ++;
 2101         t += (REAL)k * (SQR((REAL)k)-1.);
 2102         m = m/ (REAL)(k);
 2103         for (j=i; j>(i-k); j--) {
 2104           vrec[j].rank = m+1.;
 2105         }
 2106       }
 2107       k=0;
 2108       m=0.;
 2109     }
 2110     i++;
 2111   }
 2112 
 2113   for (i=0; i<n; i++) {
 2114     r[vrec[i].ind] += vrec[i].rank;
 2115   }
 2116 
 2117 
 2118   nr = (REAL)n;
 2119   cor = 1. - t/(SQR(nr)*(nr-1.));
 2120 
 2121   for (ic=0; ic<ncol; ic++) {
 2122     h += (SQR(r[ic])/(REAL)nrow[ic]);
 2123     /* out_r("r[%u] = %g\n", ic, r[ic]);  */
 2124   }
 2125 
 2126   h = (12./(nr*(nr+1.)) * h - 3.*(nr+1.))/cor;
 2127 
 2128   colorize(ClHeader);
 2129   out_r(_("\nResult Kruskal-Wallis-Test:\n") );
 2130   colorize(ClDefault);
 2131   out_r(_("Chi^2-distributed random variable H = %g\n"), h);
 2132   out_r(_("Correction term for equal ranks (ties) = %g\n"), cor);
 2133   out_r(_("Degrees of freedom = %i\n"), ncol-1);
 2134 
 2135   out_r(_("\nHypothesis H0: Samples originate from the same set versus\n") );
 2136   out_r(_("Hypothesis H1: Samples do not originate from the same set\n") );
 2137 
 2138  /* if ((ncol>=3) && (nrow[0]>=5) && (nrow[1]>=5) && nrow[2]>=5) {  */
 2139   if ((ncol<2) || (nrow[0]<4) || (nrow[1]<4) || nrow[2]<4) {
 2140     out_r(_("Warning: Only rough approximation of probability possible!\n") );
 2141     out_r(_("Please check exact probability of alpha for h having %i degrees "
 2142       "of freedom\n"), ncol-1);
 2143     out_r(_("in the literature, e.g. in Table 16/17, pp. 599 in WEBER \n\n") );
 2144   }
 2145   if (h > 0.0) {
 2146     prob = get_chi_int(h, (ncol-1));
 2147     out_r(_("Probability alpha for H0 = %6.4f\n\n"), prob);
 2148   }
 2149   else {
 2150     out_err(MAT, ERR_FILE, ERR_LINE,
 2151         _("Calculation of Chi^2-distribution not possible since h<0!") );
 2152   }
 2153   out_end();
 2154 }
 2155 
 2156 
 2157 /* ====================================================================== */
 2158 
 2159 void vierfeld_test(REAL x[], REAL y[], int n) {
 2160    const REAL  pi=3.14159265358979323846;
 2161    long unsigned int a=0, b=0, c=0, d=0, i, xi, yi;
 2162    REAL chi, r, T, prob, alpha, delta, beta, gamma;
 2163 
 2164   out_start();
 2165    if (n!=2) {
 2166      out_r(_("Characteristics are counted (1='existent', 0='not existent')\n\n"));
 2167      for (i=0; i<n; i++) {
 2168        xi = get_round(x[i]);
 2169        yi = get_round(y[i]);
 2170        if ( ((xi!=1) && (xi!=0)) || ((yi!=1) && (yi!=0)) ) {
 2171          out_err(ERR, ERR_FILE, ERR_LINE,
 2172          _("Columns must contain only 0's and 1's!") );
 2173        out_end();
 2174          return;
 2175        }
 2176        if ( xi &&  yi) { a++; }
 2177        if (!xi &&  yi) { b++; }
 2178        if ( xi && !yi) { c++; }
 2179        if (!xi && !yi) { d++; }
 2180      }
 2181    }
 2182    else {
 2183      out_r(_("Values are being interpreted as fourfold-table:\n\n") );
 2184      a = (unsigned int) x[0];
 2185      b = (unsigned int) y[0];
 2186      c = (unsigned int) x[1];
 2187      d = (unsigned int) y[1];
 2188      n = a+b+c+d;
 2189    }
 2190 
 2191    out_r( _("Fourfold-table:\n\n") );
 2192    out_r( _(" |                   |  A existent   |  A not existent     |\n"));
 2193    out_r(" +-------------------+---------------+---------------------+\n");
 2194    out_r( _(" | B existent        |      a        |         b           |\n"));
 2195    out_r( _(" | B not existent    |      c        |         d           |\n"));
 2196    out_r(" +-------------------+---------------+---------------------+\n\n");
 2197 
 2198    alpha = ((REAL)(a+b) * (REAL)(a+c))/(REAL)n;
 2199    delta = ((REAL)(d+b) * (REAL)(d+c))/(REAL)n;
 2200    beta  = ((REAL)(a+b) * (REAL)(b+d))/(REAL)n;
 2201    gamma = ((REAL)(c+d) * (REAL)(a+c))/(REAL)n;
 2202 
 2203    if ((alpha>=5.) && (delta>=5.) && (beta>=5.) && (gamma>=5.)) {
 2204      chi = (SQR( (REAL)a*(REAL)d - (REAL)b*(REAL)c  )  *(REAL)n) /
 2205        ( (REAL)(a+b) * (REAL)(c+d) * (REAL)(a+c) * (REAL)(b+d) );
 2206    }
 2207    else {
 2208     out_r(_("Using according to YATES corrected Chi^2 value, since frequencies <= 5 expected!\n\n") );
 2209 
 2210     chi = (SQR(fabs( (REAL)a*(REAL)d - (REAL)b*(REAL)c) -0.5*(REAL)n)*(REAL)n)/
 2211       ((REAL)(a+b) * (REAL)(c+d) * (REAL)(a+c) * (REAL)(b+d));
 2212   }
 2213   T = (((REAL)a*(REAL)d - (REAL)b*(REAL)c))/
 2214     (sqrt( (REAL)(a+b) * (REAL)(c+d) * (REAL)(a+c) * (REAL)(b+d) ));
 2215   r = sin(T*(pi/4.));
 2216 
 2217   out_r(_("observed: a=%lu,  b=%lu,  c=%lu,  d=%lu,  n=%i\n"),
 2218         a, b, c, d, n);
 2219   out_r(_("expected: a=%4.2f,  b=%4.2f,  c=%4.2f,  d=%4.2f,  n=%i\n"),
 2220          alpha, beta, gamma, delta, n);
 2221   out_r( _("Chi^2 = %f\n"), chi);
 2222   out_r(_("Contingency coefficient r (according to HAMMING) = %f\n\n"), r);
 2223   out_r(_("Chi^2-test:\n") );
 2224   out_r(_("Hypothesis H0: Both items are independent (uncorrelated)\n") );
 2225   out_r(_("versus H1: Both items are dependent (correlated)\n") );
 2226   prob = get_chi_int(chi, 1);
 2227   out_r(_("Probability of H0: %6.4f\n\n"), prob);
 2228   out_end();
 2229 }
 2230 
 2231 
 2232 /* ====================================================================== */
 2233 
 2234 
 2235 void tafel_test(PREAL xx[], int nrow, int ncol) {
 2236   REAL fieldsum=0.0, sum=0.0, *colsum, *rowsum, lncols=0.0, lnrows=0.0,
 2237        prob, g, fsum, chi=0.0;
 2238   int i, k, df, max_err = 100, zero_cols=0, zero_rows=0;
 2239   PREAL *e;
 2240   BOOLEAN freq_too_small=FALSE, printall=TRUE;
 2241 
 2242   colsum = (REAL*)m_calloc(ncol, sizeof(REAL));
 2243   rowsum = (REAL*)m_calloc(nrow, sizeof(REAL));
 2244   e = (PREAL*) m_calloc(ncol, sizeof(PREAL));
 2245   for (i=0; i<ncol; i++) {
 2246     e[i] = (REAL*)m_calloc(nrow, sizeof(REAL));
 2247   }
 2248 
 2249   out_start();
 2250   for (i=0; i<ncol; i++) {
 2251     colsum[i] = 0.0;
 2252     for (k=0; k<nrow; k++) {
 2253       if(xx[i][k] < 0.0){
 2254     out_err(MAT, ERR_FILE, ERR_LINE,
 2255         _("Column \"%s\", line %i, has value < 0"),
 2256         get_name(xx[i]), k);
 2257     return;
 2258       }
 2259       fieldsum += xx[i][k] * get_ln_0(xx[i][k]);
 2260       sum += xx[i][k];
 2261       colsum[i] += xx[i][k];
 2262     }
 2263     if (colsum[i] == 0.0) {
 2264       zero_cols ++;
 2265     }
 2266     out_r("\n");
 2267   }
 2268 
 2269   for (k=0; k<nrow; k++) {
 2270     rowsum[k] = 0.0;
 2271     for (i=0; i<ncol; i++) {
 2272       rowsum[k] += xx[i][k];
 2273     }
 2274     if (rowsum[k] == 0.0) {
 2275       zero_rows ++;
 2276     }
 2277   }
 2278 
 2279   for (k=0; k<nrow; k++) {
 2280     for (i=0; i<ncol; i++) {
 2281       e[i][k] =(colsum[i]*rowsum[k])/sum;
 2282       if (e[i][k] != 0.0) {
 2283         chi += SQR(xx[i][k]-e[i][k])/e[i][k];
 2284       }
 2285       if (e[i][k] < 5.) {
 2286         freq_too_small = TRUE;
 2287       }
 2288     }
 2289   }
 2290 
 2291 
 2292   out_r(_("Analysis of two-items table:\n\n") );
 2293 
 2294   for (i=0; i<ncol; i++) {
 2295     if(colsum[i] < 0.0){
 2296       out_err(MAT, ERR_FILE, ERR_LINE,
 2297       _("Sum of values of column \"%s\" is lower than 0"),
 2298       get_name(xx[i]));
 2299       return;
 2300     }
 2301     lncols += colsum[i] * get_ln_0(colsum[i]);
 2302   }
 2303 
 2304   for (k=0; k<nrow; k++) {
 2305     if(rowsum[k] < 0.0){
 2306       out_err(MAT, ERR_FILE, ERR_LINE,
 2307       _("Sum of values of row %i is lower than 0"), k);
 2308       return;
 2309     }
 2310     lnrows += rowsum[k] * get_ln_0(rowsum[k]);
 2311   }
 2312 
 2313   if(sum < 0.0){
 2314     out_err(MAT, ERR_FILE, ERR_LINE,
 2315     _("Sum of all values of all columns is lower than 0"));
 2316     return;
 2317   }
 2318   fsum = sum * get_ln_0(sum);
 2319   g = 2 * (fieldsum-lncols-lnrows+fsum);
 2320   df = (ncol-1-zero_cols) * (nrow-1-zero_rows);
 2321 
 2322 /*
 2323   out_r("Transformierte Zeilensummen = %f\n", lnrows);
 2324   out_r("Transformierte Spaltensummen = %f\n", lncols);
 2325   out_r("Transformierte Haeufigkeiten = %f\n", fieldsum);
 2326   out_r("Transformierte Gesamtsumme = %f\n", fsum);
 2327 
 2328 */
 2329 
 2330 
 2331   out_r(  _(" Item A   |                   Item B                 \n") );
 2332   out_r("          |");
 2333   for (i=0; i<ncol; i++) {
 2334     out_r(_("Class%3i  |"), (i+1));
 2335   }
 2336   out_r(_("  Sum     |\n") );
 2337   for (i=-2; i<ncol; i++) {
 2338     out_r("----------+");
 2339   }
 2340   out_r("\n");
 2341 
 2342 
 2343   for (k=0; k<nrow; k++) {
 2344     if(k == max_err){
 2345       printall = FALSE;
 2346       out_i(_("There are %i rows to be printed yet. "
 2347         "Do you want to see them? (%s) "), (nrow - max_err), _("y/N"));
 2348       GETNLINE;
 2349       if(!(empty) && (line[0] ==_("y")[0] || line[0] != _("Y")[0])){
 2350     printall = TRUE;
 2351       }
 2352     }
 2353     if(printall){
 2354       out_r(_("Class%3i  |"), k+1);
 2355       for (i=0; i<ncol; i++) {
 2356     out_r("%7u   |", (unsigned int)xx[i][k]);
 2357       }
 2358       out_r("%7i   |\n", (unsigned int)rowsum[k]);
 2359       out_r(_("exp. frq. |") );
 2360       for (i=0; i<ncol; i++) {
 2361     out_r("%7u   |", (unsigned int)e[i][k]);
 2362       }
 2363       out_r("          |\n");
 2364       if (k<(nrow-1)) {
 2365     for (i=-2; i<ncol; i++) {
 2366       out_r("----------+");
 2367     }
 2368     out_r("\n");
 2369       }
 2370     }
 2371   }
 2372 
 2373   for (i=-2; i<ncol; i++) {
 2374     out_r("----------+");
 2375   }
 2376   out_r(_("\n Sum      |") );
 2377   for (i=0; i<ncol; i++) {
 2378     out_r("%7i   |", (unsigned int)colsum[i]);
 2379   }
 2380   out_r("%7i   |\n", (unsigned int)sum);
 2381 
 2382   out_r( _("\nChi^2                            = %f\n"), chi);
 2383   out_r(_("G (check value for Chi^2-Test) = %f\n"), g);
 2384   out_r(_("Degrees of freedom = %i\n\n"), df);
 2385   if (freq_too_small) {
 2386     out_r(_("Warning: Expected frequencies < 5!\n\n") );
 2387   }
 2388 
 2389   out_r(_("Chi^2-test:\n") );
 2390   out_r(_("Hypothesis H0: Both items are independent (uncorrelated)\n") );
 2391   out_r(_("versus H1: Both items are dependent (correlated)\n") );
 2392   prob = get_chi_int(g, df);
 2393   out_r(_("Probability of H0: %6.4f\n\n"), prob);
 2394   out_end();
 2395 
 2396 }
 2397 
 2398 
 2399 /* ====================================================================== */
 2400 
 2401 void wilcoxon_test(REAL x[], REAL y[], int n) {
 2402   SORTREC *vrec;
 2403   int i, k, j, nv, alpha, navg, t;
 2404   REAL *diff, *avg;
 2405   REAL m, s_plus, s_minus, s_min, d, nr, nd, prob, z, median, conf_u, conf_l;
 2406   const short int sig[20][3] = {
 2407     {  0, -1, -1 }, {  2,  0, -1 }, {  4,  2,  0 }, {  6,  3,  2 },
 2408     {  8,  5,  3 }, { 11,  7,  5 }, { 14, 10,  7 }, { 17, 13, 10 },
 2409     { 21, 16, 13 }, { 25, 20, 16 }, { 30, 24, 20 }, { 35, 28, 23 },
 2410     { 40, 33, 28 }, { 46, 38, 32 }, { 52, 43, 38 }, { 59, 49, 43 },
 2411     { 66, 56, 49 }, { 73, 62, 55 }, { 81, 69, 61 },
 2412     { 89, 77, 68 }
 2413   };
 2414 
 2415 
 2416   diff = (REAL*) m_calloc(n, sizeof(REAL));
 2417   vrec = (SORTREC*)m_calloc(n, sizeof(SORTREC));
 2418 
 2419   nv = 0;
 2420   for (i=0; i<n; i++) {
 2421     d = x[i]-y[i];
 2422     diff[i] = d;
 2423     if (d != 0.0) {
 2424       vrec[nv].val = d;
 2425       nv ++;
 2426     }
 2427   }
 2428 
 2429   if (nv == 0) {
 2430     out_err(MWA, ERR_FILE, ERR_LINE,
 2431         _("All value pairs are equal. WILCOXON-Test thus not possible/has no meaning."));
 2432     return;
 2433   }
 2434 
 2435   qsort(vrec, nv, sizeof(SORTREC), wilcoxon_rank_compar);
 2436 
 2437   i = 0;
 2438   k = 0;
 2439   m = 0.;
 2440   while (i<nv) {
 2441     vrec[i].rank = (REAL)i+1.;
 2442     if ( (i<(nv-1)) && (equal(fabs(vrec[i].val), fabs(vrec[i+1].val))) ) {
 2443       k++;
 2444       m += (REAL)i;
 2445     }
 2446     else {
 2447       if (k!=0) {
 2448         m += (REAL)i;
 2449         k ++;
 2450         m = m / (REAL)(k);
 2451         for (j=i; j>(i-k); j--) {
 2452           vrec[j].rank = m+1.;
 2453         }
 2454       }
 2455       k=0;
 2456       m=0.;
 2457     }
 2458     i++;
 2459   }
 2460 
 2461   s_plus = 0.0;
 2462   s_minus = 0.0;
 2463   for (i=0; i<nv; i++) {
 2464 /*    printf("diff=%20.17f rang=%f\n", vrec[i].val, vrec[i].rank);     */
 2465     if (vrec[i].val > 0.0) {
 2466       s_plus += vrec[i].rank;
 2467     }
 2468     else {
 2469       s_minus += vrec[i].rank;
 2470     }
 2471   }
 2472 
 2473   median = get_median(diff, n);
 2474   navg = n*(n+1)/2;
 2475   avg = (REAL*) m_calloc(navg, sizeof(REAL));
 2476   nr = (REAL)nv;
 2477   nd = (REAL)m;
 2478 
 2479   k = 0;
 2480   for (i=0; i<n; i++) {
 2481     for (j=i; j<n; j++) {
 2482       avg[k] = (diff[i] + diff[j])/2.0;
 2483       k ++;
 2484     }
 2485   }
 2486 
 2487   qsort(avg, navg, sizeof(REAL), real_compar_up);
 2488 /*  nr = 50; */
 2489   if (n<26) {
 2490     t = sig[n-6][2];
 2491   }
 2492   else {
 2493     z = get_z(0.99);
 2494     t = (int) (nd*(nd+1.0)/4.0 - z*sqrt(nd*(nr+1.0)*(2.0*nd+1.0)/24.0) - 0.5);
 2495   }
 2496   if ( (t<0) || (t>=navg) ) {
 2497     t = 0;
 2498   }
 2499 
 2500   conf_l = avg[t];
 2501   conf_u = avg[navg-t-1];
 2502 
 2503 /*
 2504   for (i=0; i<navg; i++) {
 2505     printf("%i %f\n", i, avg[i]);
 2506   }
 2507 
 2508   printf("t= %i, z=%f\n", t, z);
 2509 
 2510   mean = get_mean(diff, nv);
 2511 
 2512 
 2513 
 2514   if (nv % 2 == 1) {
 2515     i = (nv-1)/2;
 2516     median = vrec[i].val;
 2517   }
 2518   else {
 2519     i = (nv/2) -1;
 2520     median = vrec[i].val;
 2521     i++;
 2522     median = (median + vrec[i].val)/2.;
 2523   }
 2524 */
 2525 
 2526   s_min = (s_plus<s_minus) ? s_plus : s_minus;
 2527   out_start();
 2528   colorize(ClHeader);
 2529   out_r(_("\nResult Wilcoxon-Test:\n") );
 2530   colorize(ClDefault);
 2531   out_r(_("Positive rank sum S+  : %g\n"), s_plus);
 2532   out_r(_("Negative rank sum S-  : %g\n"), s_minus);
 2533   out_r(_("Smallest rank sum S   : %g\n"), s_min);
 2534   out_r(_("Number of value pairs : %i\n"), n);
 2535   out_r(_("Size of the set       : %i\n"), nv);
 2536   out_r(_("Number 0-differences  : %i\n"), n-nv);
 2537   out_r(_("Mean of differences   : %g +/- %g\n"), get_mean(diff, n),
 2538     get_sdv(diff, n));
 2539   out_r(_("Median of differences : %f [%f, %f] (99%%)\n\n"),
 2540     median, conf_l, conf_u);
 2541 /*  out_r("Mittel der Differenzen: %f\n\n", mean);  */
 2542 
 2543   out_r(_("Hypothesis H0: x and y are 'treated' equally versus\n") );
 2544   out_r(_("Hypothesis H1: x and y are 'treated' unequally (-> two-sided)\n\n"));
 2545 
 2546   if (nv<6) {
 2547     out_r(_("Calculation of probability not possible if n < 6!\n") );
 2548     out_end();
 2549     return;
 2550   }
 2551   else if (nv<26) {
 2552     i = nv-6;
 2553     out_r(_("Critical values for S (two-sided) from table:\n") );
 2554     out_r("   5%%   2%%   1%%\n");
 2555     out_r(" %4hi %4hi %4hi\n", sig[i][0], sig[i][1], sig[i][2]);
 2556 
 2557     if (s_min <= (REAL)sig[i][2]) {
 2558       alpha = 1;
 2559     }
 2560     else if (s_min <= (REAL)sig[i][1]) {
 2561       alpha = 2;
 2562     }
 2563     else if (s_min <= (REAL)sig[i][0]) {
 2564       alpha = 5;
 2565     }
 2566     else {
 2567       alpha = -1;
 2568     }
 2569 
 2570     if (alpha != -1) {
 2571       out_r(_("H0 rejected at level of significance of %i%% (two-sided)\n\n"),
 2572       alpha);
 2573     }
 2574     else {
 2575       out_r(_("H0 can not be rejected\n\n") );
 2576     }
 2577   }
 2578 /*  else {  */
 2579 
 2580   z = (s_min -  (nr*(nr+1.0))/4.0) /
 2581       sqrt( (nr*(nr+1.0)*(2.0*nr+1.0))/24.0 );
 2582 
 2583   out_r(_("normally distributed variable z = %f\n"), z);
 2584   prob = get_norm_int(z);
 2585   out_r(_("Probability of H0 (one-sided) = %6.4f\n"), prob);
 2586   out_r(_("Probability of H0 (two-sided) = %6.4f\n\n"), prob*2.0);
 2587 /*  }      */
 2588   out_end();
 2589 }
 2590 
 2591 /* ====================================================================== */
 2592 
 2593 
 2594 void outlier(int xx_ind, int n) {
 2595   PREAL x = xx[xx_ind];
 2596   FILE *outfile;
 2597   char analias[80];
 2598   int i, iy, iout, i_l, i_u;
 2599   REAL index, q_l, q_u, w_l, w_u, m_l, m_u, max, min;
 2600   REAL median, mean, no_l, no_u;
 2601   REAL *xs;
 2602 
 2603   if (get_min(x,n)==get_max(x,n)) {
 2604     out_err(MAT, ERR_FILE, ERR_LINE, _("All values are equal!"));
 2605     return;
 2606   }
 2607   xs = (REAL*) m_calloc(n, sizeof(REAL));
 2608   for (i=0; i<n; i++) {
 2609     xs[i] = x[i];
 2610   }
 2611   qsort(xs, n, sizeof(REAL), real_compar_up);
 2612 
 2613   if (n % 2 == 1) {
 2614     i = (n-1)/2;
 2615     median = xs[i];
 2616   }
 2617   else {
 2618     i = (n/2) -1;
 2619     median = xs[i];
 2620     i++;
 2621     median = (median + xs[i])/2.;
 2622   }
 2623 
 2624   mean = get_mean(xs, n);
 2625   max = xs[n-1];
 2626   min = xs[0];
 2627   index = (REAL)n*0.25;
 2628   if (index==floor(index)) {
 2629     i_l = (int)index-1;
 2630     i_u = (int)index;
 2631   }
 2632   else {
 2633     i_l = (int) floor(index-1.);
 2634     i_u = (int) ceil(index-1.);
 2635   }
 2636 
 2637   q_l = 0.5 * (xs[i_l]+xs[i_u]);
 2638   q_u = 0.5 * (xs[n-i_l-1]+xs[n-i_u-1]);
 2639 
 2640   m_l = q_l - 1.5*(q_u-q_l);
 2641   m_u = q_u + 1.5*(q_u-q_l);
 2642 
 2643   w_l = max;
 2644   w_u = min;
 2645 
 2646   for (i=0; i<n; i++) {
 2647     if ((xs[i]>w_u) && (xs[i]<=m_u)) {
 2648       w_u = xs[i];
 2649     }
 2650     if ((xs[i]<w_l) && (xs[i]>=m_l)) {
 2651       w_l = xs[i];
 2652     }
 2653   }
 2654 
 2655   no_u = median + 1.58*(q_u-q_l)/sqrt((REAL)n);
 2656   no_l = median - 1.58*(q_u-q_l)/sqrt((REAL)n);
 2657 
 2658   if (!noplot) {
 2659     plot_box(x, n, median, mean, q_l, q_u, w_l, w_u, no_l, no_u, get_label(x));
 2660   }
 2661 
 2662   out_start();
 2663   iout = 0;
 2664   for (i=0; i<n; i++) {
 2665     if ((x[i]>w_u) || (x[i]<w_l)) {
 2666       iout ++;
 2667       out_r(_("Outliers:  x[%i]=%f\n"), i+1, x[i]);
 2668     }
 2669   }
 2670   out_r(_("\n%i possible outliers found\n\n"), iout);
 2671 
 2672   if (iout == 0) {
 2673     out_end();
 2674     return;
 2675   }
 2676  #ifndef STATIST_X
 2677   out_i(_("Delete outliers? (%s) "), _("y/N") );
 2678   GETNLINE;
 2679   if(!(empty) && (line[0]!=_("y")[0] || line[0] != _("Y")[0])){
 2680       out_end();
 2681       return;
 2682   }
 2683 
 2684   iy = 0;
 2685   strcpy(analias, "out_");
 2686   strncat(analias, alias[acol[0]], 79-strlen(analias));
 2687   make_new_col(analias, nn[xx_ind]);
 2688   outfile = tmpptr[ncol - 1];
 2689   alloc_cols(1, 3);
 2690   x = xx[xx_ind]; /* We redeclare `x' because alloc_cols delete all columns from
 2691              memory */
 2692   for (i=0; i<nn[xx_ind]; i++) {
 2693     if (!((x[i]>w_u) || (x[i]<w_l))) {
 2694      iy  ++;
 2695      FWRITE(&x[i], sizeof(REAL), 1, outfile);
 2696     }
 2697     else {
 2698      iy  ++;
 2699      FWRITE(&SYSMIS, sizeof(REAL), 1, outfile);
 2700     }
 2701   }
 2702 
 2703   out_r(_("%i possible outliers deleted\n"), iout);
 2704                 /* ncol is the number, but counting starts at 0
 2705                 so we have to substract one the the right one*/
 2706   out_r(_("Created new column %s having %i values!\n\n"), alias[ncol-1], iy);
 2707 #endif // ndef STATIST_X
 2708   out_end();
 2709 }
 2710 
 2711 /* =================================================================== */
 2712 
 2713 char *center(char result[80], char *str, int width){
 2714   int i, j, l;
 2715   width += strlen(str) - stringLen(str);
 2716   for(i = 0; i < 80; i++)
 2717     result[i] = 0;
 2718   l = strlen(str);
 2719   i = (width - l) / 2;
 2720   for(j = 0; j < i; j++)
 2721     result[j] = ' ';
 2722   for(i = 0; i < l; i++){
 2723     result[j] = str[i];
 2724     j++;
 2725   }
 2726   while(j < width){
 2727     result[j] = ' ';
 2728     j++;
 2729   }
 2730   return result;
 2731 }
 2732 
 2733 void freq_table(){
 2734   typedef struct { 
 2735     char *c[6]; /* the value, the frequency, and the percentages */
 2736     void *next;
 2737   } FreqRow;
 2738 
 2739   FreqRow *row = NULL, *first_row = NULL;
 2740   char b[20];
 2741   char h[256], l[256];
 2742   PREAL y = xx[acol[0]];
 2743   REAL x, p, vp, ap = 0.0, vap = 0.0; /* value, global %, valid %, 
 2744                      cumulated %, valid cumulated % */
 2745   int i, n, m = 0, w[6], r; /* r = 3 because of the table header */
 2746   BOOLEAN truncated = FALSE;
 2747   qsort(y, nn[acol[0]], sizeof(REAL), real_compar_up);
 2748 
 2749   /* Setting default column widths */
 2750   w[0] = stringLen(_("Value")) + 4;
 2751   w[1] = stringLen(_("N")) + 2;
 2752   w[2] = 8;
 2753   w[3] = stringLen(_("Valid %")) + 2;
 2754   w[4] = stringLen(_("Cum. %")) + 2;
 2755   w[5] = stringLen(_("V. Cum. %")) + 2;
 2756   if(w[3] < 8)
 2757     w[3] = 8;
 2758   if(w[4] < 8)
 2759     w[4] = 8;
 2760   if(w[5] < 8)
 2761     w[5] = 8;
 2762 
 2763   /* Counting frequencies */
 2764   i = 0;
 2765   do{
 2766     x = y[i];
 2767     n = 0;
 2768     while(i < nn[acol[0]] && y[i] == x){
 2769       n++;
 2770       i++;
 2771     }
 2772     if(row == NULL){
 2773       first_row = (FreqRow*)m_calloc(1, sizeof(FreqRow));
 2774       row = first_row;
 2775     } else{
 2776       row->next = (FreqRow*)m_calloc(1, sizeof(FreqRow));
 2777       row = row->next;
 2778     }
 2779     for(r = 2; r < 6; r++)
 2780       row->c[r] = (char*)m_calloc(7, sizeof(char));
 2781     sprintf(b, "%i", n);
 2782     row->c[1] = (char*)m_calloc((strlen(b) + 1), sizeof(char));
 2783     strcpy(row->c[1], b);
 2784     p = 100 * (REAL)n / (REAL)nn[acol[0]];
 2785     sprintf(row->c[2], "%6.2f", p);
 2786     ap += p;
 2787     sprintf(row->c[4], "%6.2f", ap);
 2788     if(x == SYSMIS){
 2789       m = n;
 2790       row->c[0] = (char*)m_calloc((strlen(_("Missing")) + 1), sizeof(char));
 2791       strcpy(row->c[0], _("Missing"));
 2792       strcpy(row->c[3], "--- ");
 2793       strcpy(row->c[5], "--- ");
 2794     } else{
 2795       if(names[acol[0]]){
 2796     for(r = 0; r <  names[acol[0]]->n; r++){
 2797       if(x == names[acol[0]]->v[r]){
 2798         row->c[0] = names[acol[0]]->l[r];
 2799         break;
 2800       }
 2801     }
 2802     if(row->c[0] == NULL){
 2803       sprintf(b, "%6g", x);
 2804       row->c[0] = (char*)m_calloc((strlen(b) + 1), sizeof(char));
 2805       strcpy(row->c[0], b);
 2806     }
 2807       } else{
 2808       sprintf(b, "%6g", x);
 2809       row->c[0] = (char*)m_calloc((strlen(b) + 1), sizeof(char));
 2810       strcpy(row->c[0], b);
 2811       }
 2812       vp = 100 * (REAL)n / (REAL)(nn[acol[0]] - m);
 2813       sprintf(row->c[3], "%6.2f", vp);
 2814       vap += vp;
 2815       sprintf(row->c[5], "%6.2f", vap);
 2816     }
 2817   } while(i < nn[acol[0]]);
 2818   if(first_row == NULL){
 2819     out_err(ERR, ERR_FILE, ERR_LINE, "first_row == NULL!");
 2820     return;
 2821   }
 2822 
 2823   /* Calculating column widths for "value" and "N"*/
 2824   row = first_row;
 2825   while(row){
 2826     for(i = 0; i < 2; i++)
 2827       if((stringLen(row->c[i]) + 2) > w[i])
 2828     w[i] = stringLen(row->c[i]) + 2;
 2829     row = row->next;
 2830   }
 2831 
 2832   /* Printing header */
 2833   memset(l, 0, 256);
 2834   if(names[acol[0]] && names[acol[0]]->ctitle)
 2835     strncpy(l, names[acol[0]]->ctitle, 255);
 2836   else
 2837     strncpy(l, alias[acol[0]], 255);
 2838   colorize(ClHeader);
 2839   out_r(_("Frequencies: %s\n"), l);
 2840   colorize(ClDefault);
 2841   strcpy(b, _("Value"));
 2842   sprintf(h, " %s ", center(l, b, w[0]));
 2843   strcpy(b, _("N"));
 2844   strcat(h, center(l, b, w[1]));
 2845   strcat(h, center(l, "%", w[2]));
 2846   strcpy(b, _("Valid %"));
 2847   strcat(h, center(l, b, w[3]));
 2848   strcpy(b, _("Cum. %"));
 2849   strcat(h, center(l, b, w[4]));
 2850   strcpy(b, _("V. Cum. %"));
 2851   strcat(h, center(l, b, w[5]));
 2852   r = stringLen(h) + 1;
 2853   for(i = 0; i < r; i++)
 2854     l[i] = '=';
 2855   l[i] = 0;
 2856   out_r("%s\n", l);
 2857   colorize(ClHeader);
 2858   out_r("%s\n", h);
 2859   colorize(ClDefault);
 2860   for(i = 0; i < r; i++)
 2861     l[i] = '-';
 2862   l[i] = 0;
 2863   out_r("%s\n", l);
 2864   for(i = 0; i < r; i++)
 2865     l[i] = '=';
 2866   l[i] = 0;
 2867 
 2868   /* Printing rows */
 2869   row = first_row;
 2870   r = 4; /* table title + table header = 4 lines */
 2871   int diff;
 2872   while(row){
 2873     diff = strlen(row->c[0]) - stringLen(row->c[0]);
 2874     if(names[acol[0]])
 2875       sprintf(b, " %%-%is", w[0]+diff);
 2876     else
 2877       sprintf(b, " %%%is", w[0]+diff);
 2878     out_r(b, row->c[0]);
 2879     for(i = 1; i < 6; i++){
 2880       diff = strlen(row->c[i]) - stringLen(row->c[i]);
 2881       sprintf(b, "%%%is", w[i]+diff);
 2882       out_r(b, row->c[i]);
 2883     }
 2884     out_r("\n");
 2885     if ((r+1) % (SCRLINES - 1) == 0){
 2886       mywait();
 2887       if(!empty){
 2888     truncated = TRUE;
 2889     break;
 2890       }
 2891     }
 2892     r++;
 2893     row = row->next;
 2894   }
 2895   
 2896   if(truncated){
 2897     out_r("\n");
 2898     out_err(MWA, ERR_FILE, ERR_LINE,
 2899     _("         ---  TABLE TRUNCATED  ---"));
 2900   } else{
 2901     out_r("%s\n", l);
 2902   }
 2903 }
 2904 
 2905 typedef struct { 
 2906   REAL v; /* the same as xx[0][j] */
 2907   int i;  /* the index: j */
 2908 } Mirror;
 2909 
 2910 int mirror_compare_up(const void *a, const void *b) {
 2911   if (((Mirror*)a)->v > ((Mirror*)b)->v){
 2912     return  1;
 2913   } else{
 2914     if (((Mirror*)a)->v < ((Mirror*)b)->v){
 2915       return -1;
 2916     } else{
 2917       return  0;
 2918     }
 2919   }
 2920 }
 2921 
 2922 int whatwidth_i(int i){
 2923   char s[20];
 2924   sprintf(s, "%i", i);
 2925   return(strlen(s));
 2926 }
 2927 
 2928 int whatwidth_r(REAL r){
 2929   char s[20];
 2930   int i, l;
 2931   if(r == SYSMIS)
 2932     return 1;
 2933   i = floor(r);
 2934   sprintf(s, "%i", i);
 2935   l = strlen(s);
 2936   if(i >= 100)
 2937     return(l + 3);      /*  100.00 */
 2938   else
 2939     if(i <= -100)
 2940       return(l + 4);        /* -100.00 */
 2941     else
 2942       if(i >=0 && i < 100)  /*  1.0000 */
 2943     return(l + 5);
 2944       else          /* -1.0000 */
 2945     return(l + 6);
 2946 }
 2947 
 2948 /* The calculus and the printing of the Table of Means are truncated if there
 2949  * are more than MRESULT. I think that such a table is only meaniful when there
 2950  * is a small number of classes of frequency, and I suppose that the user has
 2951  * chosen this analysis by mistake in these cases */
 2952 void compare_means(int c){
 2953   typedef struct { 
 2954     PREAL r; /* the class of frequency, and the means */
 2955     PREAL s; /* the standard deviation */
 2956     int *n;  /* the frequency, and n. of valid values used to calc. the mean */
 2957     void *next;
 2958   } MeanRow;
 2959 
 2960   int i, j, k, p, n, nc, w;
 2961   int *nw, *rw, *sw; /* lists of widths for columns */ 
 2962   char b[80], *l, *L; 
 2963   MeanRow *row, *firstrow; 
 2964   BOOLEAN truncated = FALSE, vlabelfound = FALSE;
 2965   Mirror *y = (Mirror*)m_calloc(nn[acol[0]], sizeof(Mirror)); 
 2966   REAL *z = (PREAL)m_calloc(nn[acol[0]], sizeof(REAL));
 2967 
 2968   for(i = 0; i < nn[acol[0]]; i++){
 2969     y[i].v = xx[acol[0]][i];
 2970     y[i].i = i;
 2971   }
 2972 
 2973   /* Sort "y" to calculate frequencies, as in freq_table(), but keep track of
 2974    * original indexes to calculate means. */
 2975   qsort(y, nn[acol[0]], sizeof(Mirror), mirror_compare_up);
 2976 
 2977   /* Calculating means, and filling the table of means */
 2978   j = 0;
 2979   k = 0;
 2980   nc = 0;
 2981   row = (MeanRow*)m_calloc(1, sizeof(MeanRow));
 2982   firstrow = row;
 2983   do{
 2984     row->r = (PREAL)m_calloc(c, sizeof(REAL));
 2985     row->s = (PREAL)m_calloc(c, sizeof(REAL));
 2986     row->n = (int*)m_calloc(c, sizeof(int));
 2987     row->next = NULL;
 2988     row->r[0] = y[j].v;
 2989     n = 0;
 2990     i = j;
 2991     while(j < nn[acol[0]] && y[j].v == row->r[0]){ /* Counting frequency */
 2992       n++;
 2993       j++;
 2994     }
 2995     row->n[0] = n;
 2996     for(p = 1; p < c; p++){ /* Filling "z[]" to calculate the mean */
 2997       n = 0;
 2998       for(k = i; k < j; k++){
 2999     if(xx[acol[p]][y[k].i] != SYSMIS){
 3000       z[n] = xx[acol[p]][y[k].i];
 3001       n++;
 3002     }
 3003       }
 3004       if(n > 0)
 3005     row->r[p] = get_mean(z, n);
 3006       else
 3007     row->r[p] = 10.0;           /* This value will not be printed,   */
 3008       if(n > 1)                     /* but will be used to calculate the */
 3009     row->s[p] = get_sdv(z, n);  /* column width: 0.0000 is broader   */
 3010       else              /* than 10.00 */
 3011     row->s[p] = 10.0;
 3012       row->n[p] = n;
 3013     }
 3014     nc++;
 3015     if(nc > MRESULT)
 3016       break;
 3017     if(j < nn[acol[0]]){
 3018       row->next = (MeanRow*)m_calloc(1, sizeof(MeanRow));
 3019       row = row->next;
 3020     }
 3021   } while(j < nn[acol[0]]);
 3022 
 3023   /* Calculating the necessary widths for columns */
 3024   rw = (int*)m_calloc(c, sizeof(int));
 3025   sw = (int*)m_calloc(c, sizeof(int));
 3026   nw = (int*)m_calloc(c, sizeof(int));
 3027   for(p = 0; p < c; p++){
 3028     if(p == 0){
 3029       rw[p] = stringLen(_("value"));
 3030       sw[p] = 0;
 3031     } else{
 3032       rw[p] = stringLen(_("mean"));
 3033       sw[p] = stringLen(_("sdev"));
 3034     }
 3035     nw[p] = 2;
 3036     row = firstrow;
 3037     do{
 3038       i = whatwidth_i(row->n[p]);
 3039       if(i > nw[p])
 3040         nw[p] = i;
 3041       i = whatwidth_r(row->r[p]);
 3042       if(i > rw[p])
 3043     rw[p] = i;
 3044       if(p > 0){
 3045     i = whatwidth_r(row->s[p]);
 3046     if(i > sw[p])
 3047       sw[p] = i;
 3048       }
 3049       if(row->next)
 3050     row = row->next;
 3051     }while(row->next);
 3052   }
 3053   if(names[acol[0]]){
 3054     k = rw[0];
 3055     for(i = 0; i < names[acol[0]]->n; i++)
 3056       if((2 + stringLen(names[acol[0]]->l[i])) > k)
 3057     k = stringLen(names[acol[0]]->l[i]) + 2;
 3058     rw[0] = k;
 3059   }
 3060 
 3061   /* calculating the total width of the table */
 3062   w = 0;
 3063   sw[0] = 0;
 3064   for(p = 0; p < c; p++){
 3065     if(p != 0){
 3066       rw[p] += 2;       /* width += blank space between columns */
 3067       sw[p] += 2;
 3068     }
 3069     nw[p] += 2;
 3070     w += rw[p] + sw[p] + nw[p];
 3071   }
 3072 
 3073   /* Printing the table of means */
 3074   if(names[acol[0]] && names[acol[0]]->ctitle)
 3075     strncpy(b, names[acol[0]]->ctitle, 79);
 3076   else
 3077     strncpy(b, alias[acol[0]], 79);
 3078   colorize(ClHeader);
 3079   out_r(_("Comparison of means: %s\n"), b);
 3080   colorize(ClDefault);
 3081   l = (char*)m_calloc((w+3), sizeof(char));
 3082   L = (char*)m_calloc((w+3), sizeof(char));
 3083   for(i = 0; i <= w; i++){
 3084     l[i] = '-';
 3085     L[i] = '=';
 3086   }
 3087   l[i] = '\n';
 3088   L[i] = '\n';
 3089   out_r(L);
 3090   colorize(ClHeader);
 3091   for(p = 0; p < c; p++){
 3092     i = rw[p] + sw[p] + nw[p];
 3093     out_r("%s", center(b, alias[acol[p]], i));
 3094   }
 3095   colorize(ClDefault);
 3096   out_r("\n");
 3097   for(p = 0; p < c; p++){
 3098     strcpy(b, " ");
 3099     for(i = 0; i < (rw[p] + sw[p] + nw[p] - 1); i++)
 3100       strcat(b, "-");
 3101     out_r(b);
 3102   }
 3103   out_r("\n");
 3104   colorize(ClHeader);
 3105   out_r("%s ", center(b, _("value"), rw[0]));
 3106   out_r("%s", center(b, _("N"), nw[0]));
 3107   for(p = 1; p < c; p++){
 3108     out_r("%s", center(b, _("mean"), rw[p]));
 3109     out_r("%s", center(b, _("sdev"), rw[p]));
 3110     out_r("%s", center(b, _("N"), nw[p]));
 3111   }
 3112   colorize(ClDefault);
 3113   out_r("\n%s", l);
 3114 
 3115   i = 5; /* header = 5 lines */
 3116   row = firstrow;
 3117   int diff;
 3118   while(row){
 3119     for(p = 0; p <  c; p++){
 3120       if(p == 0){
 3121     if(names[acol[0]] && names[acol[0]]->n > 0){  /* print value label */
 3122       if(row->r[p] == SYSMIS){
 3123         sprintf(b, " %%-%is%%%ii", rw[p], nw[p]);
 3124         out_r(b, NODATA, row->n[p]);
 3125       } else{
 3126         for(k = 0; k <  names[acol[0]]->n; k++){
 3127           if(row->r[p] == names[acol[0]]->v[k]){
 3128         diff = strlen(names[acol[0]]->l[k]) - stringLen(names[acol[0]]->l[k]);
 3129         sprintf(b, " %%-%is%%%ii", rw[p]+diff, nw[p]);
 3130         out_r(b, names[acol[0]]->l[k], row->n[p]);
 3131         vlabelfound = TRUE;
 3132         break;
 3133           }
 3134         }
 3135         if(vlabelfound)
 3136           vlabelfound = FALSE;
 3137         else{
 3138           sprintf(b, " %%-%i.%if%%%ii", rw[p], 4, nw[p]);
 3139           out_r(b, row->r[p], row->n[p]);
 3140         }
 3141       }
 3142     } else{ /* no label for values: print value */
 3143       if(row->r[p] == SYSMIS){
 3144         sprintf(b, "%%%is%%%ii", rw[p], nw[p]);
 3145         out_r(b, NODATA, row->n[p]);
 3146       } else{
 3147         sprintf(b, "%%%i.%if%%%ii", rw[p], 4, nw[p]);
 3148         out_r(b, row->r[p], row->n[p]);
 3149       }
 3150     }
 3151       } else{
 3152     if(row->n[p] > 1){
 3153       if((row->r[p] >= 100.0 || row->r[p] <= -100.0) &&
 3154           (row->s[p] >= 100.0 || row->s[p] <= -100.0))
 3155         sprintf(b, "%%%i.%if%%%i.%if%%%ii", rw[p], 2, sw[p], 2, nw[p]);
 3156       else if((row->r[p] >= 100.0 || row->r[p] <= -100.0) &&
 3157           (row->s[p] < 100.0 && row->s[p] > -100.0))
 3158         sprintf(b, "%%%i.%if%%%i.%if%%%ii", rw[p], 2, sw[p], 4, nw[p]);
 3159       else if((row->r[p] < 100.0 && row->r[p] > -100.0) &&
 3160           (row->s[p] >= 100.0 || row->s[p] <= -100.0))
 3161         sprintf(b, "%%%i.%if%%%i.%if%%%ii", rw[p], 4, sw[p], 2, nw[p]);
 3162       else
 3163         sprintf(b, "%%%i.%if%%%i.%if%%%ii", rw[p], 4, sw[p], 4, nw[p]);
 3164       out_r(b, row->r[p], row->s[p], row->n[p]);
 3165     } else{
 3166       if(row->n[p] == 1){
 3167         if(row->r[p] >= 100.0 || row->r[p] <= -100.0) 
 3168           sprintf(b, "%%%i.%if%%%is%%%ii", rw[p], 2, sw[p], nw[p]);
 3169         else
 3170           sprintf(b, "%%%i.%if%%%is%%%ii", rw[p], 4, sw[p], nw[p]);
 3171         out_r(b, row->r[p], "--", row->n[p]);
 3172       } else{
 3173         sprintf(b, "%%%is%%%is%%%ii", rw[p], sw[p], nw[p]);
 3174         out_r(b, "--", "--", 0);
 3175       }
 3176     }
 3177       }
 3178     }
 3179     out_r("\n");
 3180     i++;
 3181     if ((i+1) % (SCRLINES - 1) == 0){
 3182       mywait();
 3183       if(!empty){
 3184     truncated = TRUE;
 3185     break;
 3186       }
 3187     }
 3188     row = row->next;
 3189   }
 3190   if(truncated){
 3191     out_r("\n");
 3192     out_err(MWA, ERR_FILE, ERR_LINE,
 3193     _("         ---  TABLE TRUNCATED  ---"));
 3194   } else
 3195     if(nc > MRESULT){
 3196       out_r("\n");
 3197       out_err(MWA, ERR_FILE, ERR_LINE,
 3198     _("         ---  TABLE TRUNCATED  ---"));
 3199     } else
 3200       out_r("%s\n", L);
 3201 }
 3202 
 3203 /* =================================================================== */
 3204