"Fossies" - the Fresh Open Source Software Archive 
Member "speech_tools/base_class/EST_FeatureData.cc" (11 Sep 2017, 13777 Bytes) of package /linux/misc/speech_tools-2.5.0-release.tar.gz:
1 /************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996,1997 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /************************************************************************/
33 /* */
34 /* Author: Paul Taylor Caley */
35 /* Date: July 1998 */
36 /* -------------------------------------------------------------------- */
37 /* Feature Data Class */
38 /* */
39 /************************************************************************/
40
41 #include "EST_TMatrix.h"
42 #include "EST_Val.h"
43 #include "EST_FeatureData.h"
44 #include "EST_string_aux.h"
45 #include "EST_Token.h"
46 #include "EST_FileType.h"
47 #include "EST_error.h"
48 #include <iostream>
49 #include <fstream>
50
51 #include "EST_THash.h"
52
53
54 EST_FeatureData::EST_FeatureData()
55 {
56 default_vals();
57 }
58
59
60
61 EST_FeatureData::EST_FeatureData(const EST_FeatureData &a)
62 {
63 default_vals();
64 copy(a);
65 }
66
67 EST_FeatureData::~EST_FeatureData(void)
68 {
69 }
70
71 int EST_FeatureData::num_samples() const
72 {
73 return fd.num_rows();
74 }
75
76 int EST_FeatureData::num_features() const
77 {
78 return fd.num_columns();
79 }
80
81
82 void EST_FeatureData::default_vals()
83 {
84 /* cout << "Default values\n";
85 p_sub_fd = false;
86 p_info = new EST_FeatureInfo;
87 */
88 }
89
90 void EST_FeatureData::set_num_samples(int num_samples, bool preserve)
91 {
92 fd.resize(num_samples, fd.num_columns(), preserve);
93 }
94
95 void EST_FeatureData::resize(int num_samples, int num_features, bool preserve)
96 {
97 // If enlargement is required, give new features dummy names
98 // and set their types to <STRING>. If preserve is set to 0
99 // rename all features this way.
100
101 if (num_features > fd.num_columns())
102 {
103 int i;
104 if (preserve)
105 i = fd.num_columns();
106 else
107 i = 0;
108 for (; i < num_features; ++i)
109 info.set("unnamed_" + itoString(i), "<STRING>");
110 }
111
112 fd.resize(num_samples, num_features, preserve);
113 }
114
115 void EST_FeatureData::resize(int num_samples, EST_Features &f, bool preserve)
116 {
117 fd.resize(num_samples, f.length(), preserve);
118 info = f;
119 }
120
121 EST_String EST_FeatureData::type(const EST_String &feature_name)
122 {
123 EST_String t = info.S(feature_name);
124
125 if (t.contains("<", 0)) // i.e. a predefined type
126 return t;
127
128 return "undef";
129 }
130
131 EST_StrList EST_FeatureData::values(const EST_String &feature_name)
132 {
133 EST_StrList v;
134 EST_String t = info.S(feature_name);
135
136 // check for infinite set:
137 if ((t == "<FLOAT>") || (t == "<INT>") || (t == "<STRING>"))
138 return v;
139
140 StringtoStrList(t, v);
141 return v;
142 }
143
144 int EST_FeatureData::feature_position(const EST_String &feature_name)
145 {
146 int i;
147
148 EST_Features::Entries p;
149
150 for (i = 0, p.begin(info); p; ++p, ++i)
151 {
152 // cout << "looking at " << info.fname(p) << endl;
153 // cout << "i = " << i << endl;
154 if (p->k == feature_name)
155 return i;
156 }
157
158 EST_error("No such feature %s\n", (const char *) feature_name);
159 return 0;
160 }
161
162 int EST_FeatureData::update_values(const EST_String &feature_name, int max)
163 {
164 // This should be converted back to Hash tables once extra
165 // iteration functions are added the EST_Hash.
166 int i, col;
167 EST_Features values;
168 EST_String v;
169
170 // EST_TStringHash<int> values(max);
171
172 col = feature_position(feature_name);
173
174 for (i = 0; i < num_samples(); ++i)
175 values.set(fd.a(i, col).string(), 1);
176
177 // check to see if there are more types than allowed, if so
178 // just set to open set STRING
179 if (values.length() > max)
180 v = "<STRING>";
181 else
182 {
183 EST_Features::Entries p;
184 for(p.begin(values); p; ++p)
185 v += p->k + " ";
186 }
187
188 info.set(feature_name, v);
189
190 return values.length();
191 }
192
193 EST_FeatureData & EST_FeatureData::copy(const EST_FeatureData &a)
194 {
195 (void) a;
196 /* // copy on a sub can't alter header information
197 if (!p_sub_fd)
198 {
199 delete p_info;
200 *p_info = *(a.p_info);
201 }
202 // but data can be copied so long as no resizing is involved.
203 EST_ValMatrix::operator=(a);
204 */
205 return *this;
206 }
207
208 /*void EST_FeatureData::a(int i, int j)
209 {
210 return EST_ValMatrix::a(i, j);
211 }
212 */
213 /*
214 EST_Val &EST_FeatureData::operator()(int i, int j)
215 {
216 return a(i, j);
217 }
218
219 EST_Val &EST_FeatureData::operator()(int s, const EST_String &f)
220 {
221 int i = info().field_index(f);
222 return a(s, i);
223 }
224
225 EST_FeatureData &EST_FeatureData::operator=(const EST_FeatureData &f)
226 {
227 return copy(f);
228 }
229
230 */
231 EST_Val &EST_FeatureData::a(int i, const EST_String &f)
232 {
233 (void)f;
234 return fd.a(i, 0);
235 }
236
237 EST_Val &EST_FeatureData::a(int i, int j)
238 {
239 return fd.a(i, j);
240 }
241 const EST_Val &EST_FeatureData::a(int i, const EST_String &f) const
242 {
243 (void)f;
244 return fd.a(i, 0);
245 }
246
247 const EST_Val &EST_FeatureData::a(int i, int j) const
248 {
249 return fd.a(i, j);
250 }
251
252
253 /*
254 void EST_FeatureData::sub_samples(EST_FeatureData &f, int start, int num)
255 {
256 sub_matrix(f, start, num);
257 f.p_info = p_info;
258 f.p_sub_fd = true;
259 }
260
261 void EST_FeatureData::extract_named_fields(const EST_String &fields)
262 {
263 EST_FeatureData n;
264 // there must be a more efficient way than a copy?
265 extract_named_fields(n, fields);
266 *this = n;
267 }
268
269 void EST_FeatureData::extract_named_fields(const EST_StrList &fields)
270 {
271 EST_FeatureData n;
272 // there must be a more efficient way than a copy?
273 extract_named_fields(n, fields);
274 *this = n;
275 }
276
277 void EST_FeatureData::extract_numbered_fields(const EST_String &fields)
278 {
279 EST_FeatureData n;
280 // there must be a more efficient way than a copy?
281 extract_numbered_fields(n, fields);
282 *this = n;
283 }
284
285 void EST_FeatureData::extract_numbered_fields(const EST_IList &fields)
286 {
287 EST_FeatureData n;
288 // there must be a more efficient way than a copy?
289 extract_numbered_fields(n, fields);
290 *this = n;
291 }
292
293
294 void EST_FeatureData::extract_named_fields(EST_FeatureData &f,
295 const EST_String &fields) const
296 {
297 EST_StrList s;
298
299 StringtoStrList(fields, s);
300 extract_named_fields(f, s);
301 }
302 void EST_FeatureData::extract_named_fields(EST_FeatureData &f,
303 const EST_StrList &n_fields) const
304 {
305 EST_Litem *p;
306 EST_StrList n_types;
307 int i, j;
308
309 info().extract_named_fields(*(f.p_info), n_fields);
310
311 for (p = n_fields.head(), i = 0; i < f.num_fields(); ++i, p = p->next())
312 for (j = 0; j < f.num_samples(); ++j)
313 f(j, i) = a(j, n_fields(p));
314
315 }
316
317 void EST_FeatureData::extract_numbered_fields(EST_FeatureData &f,
318 const EST_IList &fields) const
319 {
320 EST_Litem *p;
321 EST_StrList n_fields;
322 int i, j;
323
324 for (p = fields.head(); p; p = p->next())
325 n_fields.append(info().field_name(fields(p)));
326
327 info().extract_named_fields(*(f.p_info), n_fields);
328
329 for (p = fields.head(), i = 0; i < f.num_fields(); ++i, p = p->next())
330 for (j = 0; j < f.num_samples(); ++j)
331 f(j, i) = a(j, fields(p));
332
333 }
334
335 void EST_FeatureData::extract_numbered_fields(EST_FeatureData &f,
336 const EST_String &fields) const
337 {
338 EST_StrList s;
339 EST_IList il;
340
341 StringtoStrList(fields, s);
342 StrListtoIList(s, il);
343 extract_numbered_fields(f, il);
344 }
345 */
346
347 EST_write_status save_est(const EST_FeatureData &f, const EST_String &filename)
348 {
349 (void)f;
350 (void)filename;
351 /*
352 ostream *outf;
353 EST_Litem *s, *e;
354 int i;
355 if (filename == "-")
356 outf = &cout;
357 else
358 outf = new ofstream(filename);
359
360 if (!(*outf))
361 return write_fail;
362
363 outf->precision(5);
364 outf->setf(ios::scientific, ios::floatfield);
365 outf->width(8);
366
367 *outf << "EST_File feature_data\n"; // EST header identifier
368 *outf << "DataType ascii\n";
369 *outf << "NumSamples " << f.num_samples() << endl;
370 *outf << "NumFields " << f.num_fields() << endl;
371 *outf << "FieldNames " << f.info().field_names();
372 *outf << "FieldTypes " << f.info().field_types();
373 if (f.info().group_start.length() > 0)
374 for (s = f.info().group_start.head(), e = f.info().group_end.head();
375 s; s = s->next(), e = e->next())
376 *outf << "Group " << f.info().group_start.key(s) << " " <<
377 f.info().group_start.val(s) << " " << f.info().group_end.val(e) << endl;
378
379 for (i = 0; i < f.num_fields(); ++i)
380 if (f.info().field_values(i).length() > 0)
381 *outf << "Field_" << i << "_Values "
382 << f.info().field_values(i) << endl;
383
384 *outf << "EST_Header_End\n"; // EST end of header identifier
385
386 // *outf << ((EST_ValMatrix ) f);
387 *outf << f;
388 */
389
390 return write_ok;
391 }
392
393
394 EST_write_status EST_FeatureData::save(const EST_String &filename,
395 const EST_String &file_type) const
396 {
397 if ((file_type == "est") || (file_type == ""))
398 return save_est(*this, filename);
399 /* else if (file_type = "octave")
400 return save_octave(*this, filename);
401 else if (file_type = "ascii")
402 return save_ascii(*this, filename);
403 */
404
405 cerr << "Can't save feature data in format \"" << file_type << endl;
406 return write_fail;
407 }
408
409
410
411 EST_read_status EST_FeatureData::load(const EST_String &filename)
412 {
413 int i, j;
414 EST_Option hinfo;
415 EST_String k, v;
416 EST_read_status r;
417 bool ascii;
418 EST_TokenStream ts;
419 EST_EstFileType t;
420 int ns, nf;
421
422 if (((filename == "-") ? ts.open(cin) : ts.open(filename)) != 0)
423 {
424 cerr << "Can't open track file " << filename << endl;
425 return misc_read_error;
426 }
427 // set up the character constant values for this stream
428 ts.set_SingleCharSymbols(";");
429 ts.set_quotes('"','\\');
430
431 if ((r = read_est_header(ts, hinfo, ascii, t)) != format_ok)
432 {
433 cerr << "Error reading est header of file " << filename << endl;
434 return r;
435 }
436
437 if (t != est_file_feature_data)
438 {
439 cerr << "Not a EST Feature Data file: " << filename << endl;
440 return misc_read_error;
441 }
442
443 ns = hinfo.ival("NumSamples");
444 nf = hinfo.ival("NumFeatures");
445
446 cout << "ns: " << ns << endl;
447 cout << "nf: " << nf << endl;
448 resize(ns, nf);
449
450 info.clear(); // because resize will make default names
451
452 for (i = 0; i < nf; ++i)
453 {
454 k = "Feature_" + itoString(i+1);
455 if (hinfo.present(k))
456 {
457 v = hinfo.val(k);
458 info.set(v.before(" "), v.after(" "));
459 cout << "value: " << v.after(" ") << endl;
460 }
461 else
462 EST_error("No feature definition given for feature %d\n", i);
463 }
464
465 for (i = 0; i < ns; ++i)
466 {
467 EST_Features::Entries p;
468 for (p.begin(info), j = 0; j < nf; ++j, ++p)
469 {
470 if (p->k == "<FLOAT>")
471 a(i, j) = atof(ts.get().string());
472 else if (p->k == "<BOOL>")
473 a(i, j) = atoi(ts.get().string());
474 else if (p->k == "<INT>")
475 a(i, j) = atoi(ts.get().string());
476 else
477 a(i, j) = ts.get().string();
478 }
479 }
480
481 return format_ok;
482 }
483
484 /*ostream& operator << (ostream &st, const EST_FeatureInfo &a)
485 {
486
487 // st << a.field_names() << endl;
488 // st << a.field_types() << endl;
489
490 return st;
491 }
492 */
493
494 ostream& operator << (ostream &st, const EST_FeatureData &d)
495 {
496 int i, j;
497 EST_String t;
498 EST_Val v;
499
500 // st << a;
501
502 // EST_ValMatrix::operator<<(st, (EST_ValMatrix)a);
503
504 for (i = 0; i < d.num_samples(); ++i)
505 {
506 for (j = 0; j < d.num_features(); ++j)
507 {
508 v = d.a(i, j);
509 st << v << " ";
510 // cout << "field type " << a.info().field_type(j) << endl;
511 /* else if (a.info().field_type(j) == "float")
512 st << a.a(i, j);
513 else if (a.info().field_type(j) == "int")
514 st << a.a(i, j);
515
516 else if (a.info().field_type(j) == "string")
517 {
518 // st << "\"" << a.a(i, j) << "\"";
519 t = a.a(i, j);
520 t.gsub(" ", "_");
521 st << t;
522 }
523 */
524 }
525 st << endl;
526 }
527
528 return st;
529 }