"Fossies" - the Fresh Open Source Software Archive 
Member "statist-1.4.2/tools/run_comparison.sh" (10 Sep 2006, 5182 Bytes) of package /linux/privat/old/statist-1.4.2.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Bash source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
1 #!/bin/bash
2
3 # This file is part of statist
4 #
5 # It is distributed under the GNU General Public License.
6 # See the file COPYING for details.
7 #
8 # (C) 2006 Jakson Alves de Aquino <jalvesaq@gmail.com>
9 #
10 # $Id: run_comparison.sh,v 1.3 2006/09/10 02:42:12 jakson Exp $
11
12 # Warning: This script is the worst possible example of how to do statistics.
13 # The analyzes carried here have the only goal of comparing two different
14 # versions of statist.
15
16 # This script will run two different versions of statist to check whether the
17 # new version still produces the correct results. The old version is the one
18 # tagged as "bernhard-last-maint-2005" (here called $statist_102) and the new
19 # one is the 1.4.1. You have to compile these versions of statist and write
20 # the path to them in the variables below. The path to statist "examples"
21 # subdirectory is necessary because the file city.csv will be used.
22
23 statist_102="./statist-1.0.2"
24 statist_141="../src/statist"
25 examples_dir="../examples"
26
27 LANG=C
28 LANGUAGE=C
29 LC_ALL=C
30
31 # Before running statist-1.0.2 we must create four new data files to run
32 # analysis that require the same number of rows. Statist-1.0.2 has the option
33 # -delrow, but, alone, it isn't enough.
34
35 # NOTE 1: We can't use statist-1.0.2 to "export a file" with the columns
36 # because the rows with missing values will be dislocated. Example:
37
38 # 34 44 34 44
39 # 35 M will become 35 53
40 # 36 53 36 .
41
42 # As you can see, the "53" is no longer aligned with "36".
43
44 # NOTE 2: statist-1.0.2 would be unable to read the two columns that it had
45 # just exported due to the "dot" at the third column. The correct (for statist)
46 # would be to put a "M" in the second line, not a dot at the third one. So,
47 # let's use awk:
48
49 sed -e 's/age/#%age/' $examples_dir/city.csv > city.dat
50 awk '{if($2 == "sex") {print "#%sex god"} else if($2 == 1 || $2 == 0) print $2, $6}' city.dat > zero_one.dat
51 awk '{print $1, $7}' city.dat > two_columns.dat
52 awk '{print $1, $7, $8}' city.dat > three_columns.dat
53 awk '{print $1, $2, $4, $7, $8}' city.dat > five_columns.dat
54
55
56 # Now we'll run statist-1.0.2 six times.
57
58 # FIRST: We'll use two_columns.dat to run all menu items that require two
59 # columns with exactly the same number of rows. Note: In a real research we
60 # wouldn't have this privilege. We should use awk to save all combinations of
61 # two columns that we were interested in.
62
63 # SECOND: We'll use three_columns.dat to run all menu items that require
64 # three columns with exactly the same number of rows.
65
66 # THIRD: We'll run partial linear correlation with 5 items.
67
68 # FORTH: We'll run Chi^2-fourfold-test (zero_one.dat).
69
70 # FIFTH: We'll run all remaining menu items.
71
72 # SIXTH: We'll run the probit analysis.
73
74 # menu choices for the first run:
75 echo '2
76 1
77 age
78 deg
79 2
80 age
81 deg
82 5
83 age
84 deg
85 5
86 0
87 3
88 2
89 age
90 deg
91 8
92 age
93 deg
94 0
95 0
96 ' > run_it
97
98 $statist_102 -delrow -silent -noplot two_columns.dat < run_it > st102a
99
100
101 # menu choices for the second run (three_columns):
102 echo '2
103 3
104 3
105 inc
106 deg
107 age
108 9
109 3
110 inc
111 deg
112 age
113 10
114 3
115 inc
116 deg
117 age
118 5
119 0
120 3
121 5
122 3
123 inc
124 deg
125 age
126 0
127 0
128 ' > run_it
129
130 $statist_102 -delrow -silent -noplot three_columns.dat < run_it > st102b
131
132
133 # menu choices for the third run (five_columns):
134 echo '2
135 4
136 5
137 inc
138 age
139 deg
140 hap
141 sex
142 6
143 5
144 inc
145 age
146 deg
147 hap
148 sex
149 7
150 5
151 inc
152 age
153 deg
154 hap
155 sex
156 0
157 0
158 ' > run_it
159
160 $statist_102 -silent -delrow -noplot five_columns.dat < run_it > st102c
161
162
163 # menu choices for the fourth run (Chi^2):
164 echo '3
165 4
166 sex
167 god
168 0
169 0
170 ' > run_it
171
172 $statist_102 -silent -delrow -noplot zero_one.dat < run_it > st102d
173
174
175 # menu choices for the fifth run (city.dat):
176 echo '2
177 8
178 sex
179 age
180 0
181 3
182 1
183 deg
184 inc
185 6
186 deg
187 inc
188 7
189 3
190 deg
191 inc
192 age
193 9
194 deg
195 10
196 sex
197 god
198 0
199 4
200 1
201 age
202
203
204
205 3
206 inc
207 n
208 4
209 age
210 0
211 0' > run_it
212
213 $statist_102 -silent -noplot city.dat < run_it > st102e
214
215 # menu choices for the sixth run (probit):
216 echo '4
217 2
218 y
219 dose
220 ef
221 2
222 N
223 dos
224 num
225 ef
226 0
227 0
228 ' > run_it
229
230 $statist_102 -silent -noplot $examples_dir/probit.dat < run_it > st102f
231
232 cat st102* > result_statist-1.0.2
233 rm st102*
234
235
236 ##########################################################################
237 # Now, finally, we'll write the menu choices for statist-1.4.1 and run it.
238 # Note that statist-1.4.1 doesn't have (and doesn't need) the option -delrow.
239
240 echo '2
241 1
242 age
243 deg
244 2
245 age
246 deg
247 5
248 age
249 deg
250 5
251 0
252 3
253 2
254 age
255 deg
256 8
257 age
258 deg
259 0
260 2
261 3
262 inc
263 deg
264 age
265
266 9
267 inc
268 deg
269 age
270
271 10
272 inc
273 deg
274 age
275
276 5
277 0
278 3
279 5
280 inc
281 deg
282 age
283
284 y
285 0
286 2
287 4
288 inc
289 age
290 deg
291 hap
292 sex
293 6
294 inc
295 age
296 deg
297 hap
298 sex
299
300 7
301 inc
302 age
303 deg
304 hap
305 sex
306
307 0
308 3
309 4
310 sex
311 god
312 0
313 2
314 8
315 sex
316 age
317 0
318 3
319 1
320 deg
321 inc
322 6
323 deg
324 inc
325 7
326 deg
327 inc
328 age
329
330 9
331 deg
332 10
333 sex
334 god
335 0
336 4
337 1
338 age
339
340
341
342 3
343 inc
344 n
345 4
346 age
347 0
348 0' > run_it
349
350 $statist_141 --na-string "M" --noplot --silent city.dat < run_it > st140a
351
352
353 # menu choices for probit:
354 echo '4
355 2
356 y
357 dose
358 ef
359 2
360
361 dos
362 num
363 ef
364 0
365 0
366 ' > run_it
367
368
369 $statist_141 --noplot --silent $examples_dir/probit.dat < run_it > st140b
370
371 cat st140* > result_statist-1.4.1
372
373 rm st140* city.dat run_it five_columns.dat three_columns.dat two_columns.dat zero_one.dat
374
375 # Note: It's expected different results for "Partial linear correlation" with 5
376 # variables because statist-1.0.2 was using an unintialized variable. This bug
377 # was fixed in statist-1.3.1 (thanks to valgrind).
378
379 echo
380 echo "The results were saved in the files"
381 echo "result_statist-1.0.2 and result_statist-1.4.1."
382 echo
383 exit 0
384
385