"Fossies" - the Fresh Open Source Software Archive

Member "statist-1.4.2/tools/run_comparison.sh" (10 Sep 2006, 5182 Bytes) of package /linux/privat/old/statist-1.4.2.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Bash source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file.

    1 #!/bin/bash
    2 
    3 # This file is part of statist
    4 #
    5 # It is distributed under the GNU General Public License.
    6 # See the file COPYING for details.
    7 #
    8 # (C) 2006 Jakson Alves de Aquino <jalvesaq@gmail.com>
    9 #
   10 #  $Id: run_comparison.sh,v 1.3 2006/09/10 02:42:12 jakson Exp $
   11 
   12 # Warning: This script is the worst possible example of how to do statistics.
   13 # The analyzes carried here have the only goal of comparing two different
   14 # versions of statist.
   15 
   16 # This script will run two different versions of statist to check whether the
   17 # new version still produces the correct results. The old version is the one
   18 # tagged as "bernhard-last-maint-2005" (here called $statist_102) and the new
   19 # one is the 1.4.1.  You have to compile these versions of statist and write
   20 # the path to them in the variables below. The path to statist "examples"
   21 # subdirectory is necessary because the file city.csv will be used.
   22 
   23 statist_102="./statist-1.0.2"
   24 statist_141="../src/statist"
   25 examples_dir="../examples"
   26 
   27 LANG=C
   28 LANGUAGE=C
   29 LC_ALL=C
   30 
   31 # Before running statist-1.0.2 we must create four new data files to run
   32 # analysis that require the same number of rows. Statist-1.0.2 has the option
   33 # -delrow, but, alone, it isn't enough.
   34 
   35 # NOTE 1: We can't use statist-1.0.2 to "export a file" with the columns
   36 # because the rows with missing values will be dislocated. Example:
   37 
   38 # 34 44                     34 44
   39 # 35 M       will become    35 53
   40 # 36 53                     36 .
   41 
   42 # As you can see, the "53" is no longer aligned with "36".
   43 
   44 # NOTE 2: statist-1.0.2 would be unable to read the two columns that it had
   45 # just exported due to the "dot" at the third column. The correct (for statist)
   46 # would be to put a "M" in the second line, not a dot at the third one. So,
   47 # let's use awk:
   48 
   49 sed -e 's/age/#%age/' $examples_dir/city.csv > city.dat
   50 awk '{if($2 == "sex") {print "#%sex god"} else if($2 == 1 || $2 == 0) print $2, $6}' city.dat > zero_one.dat
   51 awk '{print $1, $7}' city.dat > two_columns.dat
   52 awk '{print $1, $7, $8}' city.dat > three_columns.dat
   53 awk '{print $1, $2, $4, $7, $8}' city.dat > five_columns.dat
   54 
   55 
   56 # Now we'll run statist-1.0.2 six times.
   57 
   58 # FIRST: We'll use two_columns.dat to run all menu items that require two
   59 # columns with exactly the same number of rows. Note: In a real research we
   60 # wouldn't have this privilege. We should use awk to save all combinations of
   61 # two columns that we were interested in.
   62 
   63 # SECOND: We'll use three_columns.dat to run all menu items that require
   64 # three columns with exactly the same number of rows.
   65 
   66 # THIRD: We'll run partial linear correlation with 5 items.
   67 
   68 # FORTH: We'll run Chi^2-fourfold-test (zero_one.dat).
   69 
   70 # FIFTH: We'll run all remaining menu items.
   71 
   72 # SIXTH: We'll run the probit analysis.
   73 
   74 # menu choices for the first run:
   75 echo '2
   76 1
   77 age
   78 deg
   79 2
   80 age
   81 deg
   82 5
   83 age
   84 deg
   85 5
   86 0
   87 3
   88 2
   89 age
   90 deg
   91 8
   92 age
   93 deg
   94 0
   95 0
   96 ' > run_it
   97 
   98 $statist_102 -delrow -silent -noplot two_columns.dat < run_it > st102a
   99 
  100 
  101 # menu choices for the second run (three_columns):
  102 echo '2
  103 3
  104 3
  105 inc
  106 deg
  107 age
  108 9
  109 3
  110 inc
  111 deg
  112 age
  113 10
  114 3
  115 inc
  116 deg
  117 age
  118 5
  119 0
  120 3
  121 5
  122 3
  123 inc
  124 deg
  125 age
  126 0
  127 0
  128 ' > run_it
  129 
  130 $statist_102 -delrow -silent -noplot three_columns.dat < run_it > st102b
  131 
  132 
  133 # menu choices for the third run (five_columns):
  134 echo '2
  135 4
  136 5
  137 inc
  138 age
  139 deg
  140 hap
  141 sex
  142 6
  143 5
  144 inc
  145 age
  146 deg
  147 hap
  148 sex
  149 7
  150 5
  151 inc
  152 age
  153 deg
  154 hap
  155 sex
  156 0
  157 0
  158 ' > run_it
  159 
  160 $statist_102 -silent -delrow -noplot five_columns.dat < run_it > st102c
  161 
  162 
  163 # menu choices for the fourth run (Chi^2):
  164 echo '3
  165 4
  166 sex
  167 god
  168 0
  169 0
  170 ' > run_it
  171 
  172 $statist_102 -silent -delrow -noplot zero_one.dat < run_it > st102d
  173 
  174 
  175 # menu choices for the fifth run (city.dat):
  176 echo '2
  177 8
  178 sex
  179 age
  180 0
  181 3
  182 1
  183 deg
  184 inc
  185 6
  186 deg
  187 inc
  188 7
  189 3
  190 deg
  191 inc
  192 age
  193 9
  194 deg
  195 10
  196 sex
  197 god
  198 0
  199 4
  200 1
  201 age
  202 
  203 
  204 
  205 3
  206 inc
  207 n
  208 4
  209 age
  210 0
  211 0' > run_it
  212 
  213 $statist_102 -silent -noplot city.dat < run_it > st102e
  214 
  215 # menu choices for the sixth run (probit):
  216 echo '4
  217 2
  218 y
  219 dose
  220 ef
  221 2
  222 N
  223 dos
  224 num
  225 ef
  226 0
  227 0
  228 ' > run_it
  229 
  230 $statist_102 -silent -noplot $examples_dir/probit.dat < run_it > st102f
  231 
  232 cat st102* > result_statist-1.0.2
  233 rm st102*
  234 
  235 
  236 ##########################################################################
  237 # Now, finally, we'll write the menu choices for statist-1.4.1 and run it.
  238 # Note that statist-1.4.1 doesn't have (and doesn't need) the option -delrow.
  239 
  240 echo '2
  241 1
  242 age
  243 deg
  244 2
  245 age
  246 deg
  247 5
  248 age
  249 deg
  250 5
  251 0
  252 3
  253 2
  254 age
  255 deg
  256 8
  257 age
  258 deg
  259 0
  260 2
  261 3
  262 inc
  263 deg
  264 age
  265 
  266 9
  267 inc
  268 deg
  269 age
  270 
  271 10
  272 inc
  273 deg
  274 age
  275 
  276 5
  277 0
  278 3
  279 5
  280 inc
  281 deg
  282 age
  283 
  284 y
  285 0
  286 2
  287 4
  288 inc
  289 age
  290 deg
  291 hap
  292 sex
  293 6
  294 inc
  295 age
  296 deg
  297 hap
  298 sex
  299 
  300 7
  301 inc
  302 age
  303 deg
  304 hap
  305 sex
  306 
  307 0
  308 3
  309 4
  310 sex
  311 god
  312 0
  313 2
  314 8
  315 sex
  316 age
  317 0
  318 3
  319 1
  320 deg
  321 inc
  322 6
  323 deg
  324 inc
  325 7
  326 deg
  327 inc
  328 age
  329 
  330 9
  331 deg
  332 10
  333 sex
  334 god
  335 0
  336 4
  337 1
  338 age
  339 
  340 
  341 
  342 3
  343 inc
  344 n
  345 4
  346 age
  347 0
  348 0' > run_it
  349 
  350 $statist_141 --na-string "M" --noplot --silent city.dat < run_it > st140a
  351 
  352 
  353 # menu choices for probit:
  354 echo '4
  355 2
  356 y
  357 dose
  358 ef
  359 2
  360 
  361 dos
  362 num
  363 ef
  364 0
  365 0
  366 ' > run_it
  367 
  368 
  369 $statist_141 --noplot --silent $examples_dir/probit.dat < run_it > st140b
  370 
  371 cat st140* > result_statist-1.4.1
  372 
  373 rm st140* city.dat run_it five_columns.dat three_columns.dat two_columns.dat zero_one.dat
  374 
  375 # Note: It's expected different results for "Partial linear correlation" with 5
  376 # variables because statist-1.0.2 was using an unintialized variable. This bug
  377 # was fixed in statist-1.3.1 (thanks to valgrind).
  378 
  379 echo
  380 echo "The results were saved in the files"
  381 echo "result_statist-1.0.2 and result_statist-1.4.1."
  382 echo
  383 exit 0
  384 
  385