"Fossies" - the Fresh Open Source Software Archive 
Member "speech_tools/ling_class/relation_io.cc" (11 Sep 2017, 16478 Bytes) of package /linux/misc/speech_tools-2.5.0-release.tar.gz:
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1995,1996 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Paul Taylor updated by awb */
34 /* Date : Feb 1999 */
35 /*-----------------------------------------------------------------------*/
36 /* Relation class file i/o, label files */
37 /* */
38 /*=======================================================================*/
39 #include <cstdlib>
40 #include <cstdio>
41 #include <fstream>
42 #include "EST_unix.h"
43 #include "EST_types.h"
44 #include "ling_class/EST_Relation.h"
45 #include "EST_string_aux.h"
46 #include "EST_cutils.h"
47 #include "EST_TList.h"
48 #include "EST_Option.h"
49 #include "relation_io.h"
50
51 #define DEF_SAMPLE_RATE 16000
52 #define HTK_UNITS_PER_SECOND 10000000
53
54 static EST_Regex RXleadingwhitespace("^[ \t\n\r][ \t\n\r]*.*$");
55
56 EST_read_status read_label_portion(EST_TokenStream &ts, EST_Relation &s,
57 int sample);
58
59 EST_read_status load_esps_label(EST_TokenStream &ts,EST_Relation &rel)
60 {
61 ts.set_SingleCharSymbols(";");
62 ts.set_quotes('"','\\');
63 EST_String key, val;
64
65 // Skip the header
66 while (!ts.eof())
67 {
68 key = ts.get().string();
69 if (key == "#")
70 break;
71
72 val = ts.get_upto_eoln().string();
73 // delete leading whitespace
74 if (val.matches(RXleadingwhitespace))
75 val = val.after(RXwhite);
76 rel.f.set(key, val);
77 }
78
79 if (ts.peek() == "") return format_ok;
80
81 while (!ts.eof())
82 {
83 EST_Item *si = rel.append();
84 EST_String name;
85
86 si->set("end",(float)atof(ts.get().string()));
87 ts.get(); // skip the color;
88
89 for (name = ""; (!ts.eoln()) && (ts.peek() != ";"); )
90 {
91 EST_Token &t = ts.get();
92 if (name.length() > 0) // preserve internal whitespace
93 name += t.whitespace();
94 name += t.string();
95 }
96 si->set_name(name);
97
98 if (ts.peek().string() == ";") // absorb separator
99 {
100 ts.get();
101 si->features().load(ts);
102 }
103 }
104 return format_ok;
105 }
106
107 EST_write_status save_esps_label(const EST_String &filename,
108 const EST_Relation &s,
109 bool evaluate_ff)
110 {
111 ostream *outf;
112 if (filename == "-")
113 outf = &cout;
114 else
115 outf = new ofstream(filename);
116
117 if (!(*outf))
118 {
119 cerr << "save_esps_label: can't open label output file \"" <<
120 filename << "\"" << endl;
121 return write_fail;
122 }
123
124 EST_write_status st=save_esps_label(outf, s, evaluate_ff);
125
126 if (outf != &cout)
127 delete outf;
128
129 return st;
130 }
131
132 EST_write_status save_esps_label(ostream *outf,
133 const EST_Relation &s,
134 bool evaluate_ff)
135 {
136 EST_Item *ptr;
137
138 *outf << "separator ;\n";
139 if (!s.f.present("nfields"))
140 *outf << "nfields 1\n";
141
142 EST_Features::Entries p;
143 for (p.begin(s.f); p; ++p)
144 *outf << p->k << " " << p->v << endl;
145
146 *outf << "#\n";
147 /* if (f("timing_style") == "event")
148 *outf << "timing_style event\n";
149 else if (f("timing_style") == "unit")
150 *outf << "timing_style unit\n";
151 */
152
153 for (ptr = s.head(); ptr != 0; ptr = inext(ptr))
154 {
155 *outf << "\t";
156 outf->precision(5);
157 outf->setf(ios::scientific, ios::floatfield);
158 outf->width(8);
159 // outf->fill('0');
160 if (s.f("timing_style","0") == "event")
161 *outf << ptr->F("time",0);
162 else
163 *outf << ptr->F("end",0);
164
165 *outf << " 26 \t" << ptr->S("name","0");
166
167 EST_Features f2;
168 f2 = ptr->features();
169 f2.remove("name");
170 f2.remove("end");
171 if (evaluate_ff)
172 evaluate(ptr,f2);
173
174 if (f2.length() > 0)
175 {
176 *outf << " ; ";
177 f2.save(*outf);
178 }
179 *outf << endl;
180 }
181
182 return write_ok;
183 }
184
185 EST_read_status load_ogi_label(EST_TokenStream &ts, EST_Relation &s)
186 {
187 // This function reads OGI style label files. The start, end
188 // time and names of the labels are mandatory.
189 EST_String key, val;
190 float sr;
191 int isr;
192
193 // set up the character constant values for this stream
194 ts.set_SingleCharSymbols(";");
195
196 // Skip over header
197
198 while(!ts.eof())
199 {
200 if ((ts.peek().col() == 0) && (ts.peek() == "END"))
201 {
202 if (ts.peek() == "END")
203 { // read rest of header
204 ts.get();
205 ts.get();
206 ts.get();
207 }
208 break;
209 }
210 key = ts.get().string();
211 val = ts.get().string();
212 }
213
214 sr = 1000.0 / atof(val);
215 isr = (int)sr;
216
217 if (ts.eof())
218 {
219 cerr << "Error: couldn't find header in label file "
220 << ts.filename() << endl;
221 return wrong_format;
222 }
223
224 if (read_label_portion(ts, s, isr) == misc_read_error)
225 {
226 cerr << "error: in label file " << ts.filename() << " at line " <<
227 ts.linenum() << endl;
228 return misc_read_error;
229 }
230 return format_ok;
231 }
232
233 EST_read_status load_words_label(EST_TokenStream &ts, EST_Relation &s)
234 {
235 // This function reads label files in the form of simple word strings
236 // with no timing information.
237 EST_Item *item;
238
239 while (!ts.eof())
240 {
241 item = s.append();
242 item->set("name",(EST_String)ts.get());
243 item->set("end",0.0);
244 }
245
246 return format_ok;
247 }
248
249 static float convert_long_num_string_to_time(const char *s,int sample)
250 {
251 // For those label files that think 100 nanosecond times are cool
252 // we have to provide a special function to convert them as
253 // this quickly gets beyond the capabilities of ints.
254
255 if (strlen(s) < 15)
256 return atof(s)/sample;
257 else
258 {
259 double a = 0,d;
260 int i=0;
261 for (i=0;
262 (strchr(" \n\r\t",s[i]) != NULL) && (s[i] != '\0');
263 i++);
264
265 for ( ;
266 (s[i] != '\0') && (s[i] >= '0') && (s[i] <= '9');
267 i++)
268 {
269 a = a*10;
270 d = s[i]-'0';
271 a += (d/(double)sample);
272 }
273 return a;
274 }
275 }
276
277 EST_read_status read_label_portion(EST_TokenStream &ts, EST_Relation &s,
278 int sample)
279 {
280 EST_Item *item;
281 float hend;
282 EST_String str;
283
284 while(!ts.eof())
285 {
286 str = ts.get().string();
287 if (str == ".")
288 return format_ok;
289
290 item = s.append();
291
292 str = ts.get().string();
293 hend = convert_long_num_string_to_time(str,sample);
294
295 item->set("end",hend); // time
296 item->set("name",ts.get().string()); // name
297
298 if (!ts.eoln())
299 item->set("rest_lab",ts.get_upto_eoln().string());
300 }
301
302 return format_ok;
303 }
304
305 EST_read_status load_sample_label(EST_TokenStream &ts,
306 EST_Relation &s, int sample)
307 {
308
309 if (sample == 0) // maybe this should be an error
310 sample = DEF_SAMPLE_RATE;
311
312 // set up the character constant values for this stream
313 ts.set_SingleCharSymbols(";");
314
315 s.clear();
316 if (read_label_portion(ts, s, sample) == misc_read_error)
317 {
318 cerr << "error: in label file " << ts.filename() << " at line " <<
319 ts.linenum() << endl;
320 return misc_read_error;
321 }
322 return format_ok;
323 }
324
325 EST_write_status save_htk_label(const EST_String &filename,
326 const EST_Relation &a)
327 {
328 ostream *outf;
329 if (filename == "-")
330 outf = &cout;
331 else
332 outf = new ofstream(filename);
333
334 if (!(*outf))
335 {
336 cerr << "save_htk_label: can't open label output file \"" <<
337 filename << "\"" << endl;
338 return write_fail;
339 }
340
341 EST_write_status s = save_htk_label(outf, a);
342
343
344 if (outf != &cout)
345 delete outf;
346
347 return s;
348 }
349
350 EST_write_status save_htk_label(ostream *outf,
351 const EST_Relation &a)
352 {
353 EST_Item *ptr;
354 float end,start;
355
356 outf->precision(6);
357
358 start = end = 0;
359 for (ptr = a.head(); ptr != 0; ptr = inext(ptr))
360 {
361 outf->width(15);
362 cout.setf(ios::left,ios::adjustfield);
363 *outf << (int)(start * HTK_UNITS_PER_SECOND);
364 outf->width(15);
365 end = ptr->F("end",0.0);
366 *outf << (int)(end * HTK_UNITS_PER_SECOND);
367 *outf << " " << ptr->name() << endl;
368 start = end;
369 }
370
371 return write_ok;
372 }
373
374 #if 0
375 EST_write_status save_label_spn(const EST_String &filename,
376 const EST_Relation &a)
377 {
378 EST_Stream_Item *ptr;
379
380 ostream *outf;
381 if (filename == "-")
382 outf = &cout;
383 else
384 outf = new ofstream(filename);
385
386 if (!(*outf))
387 {
388 cerr << "save_label_spn: can't open label output file \""
389 << filename << "\"" << endl;
390 return write_fail;
391 }
392
393 ptr = a.head();
394 outf->precision(3);
395 outf->setf(ios::left, ios::adjustfield);
396 outf->width(8);
397 *outf << ptr->name();
398 outf->setf(ios::scientific, ios::floatfield);
399 outf->width(8);
400 *outf << (ptr->dur() * 1000.0) << "\t (0,140)" << endl;
401
402 for (; inext(ptr) != 0; ptr = inext(ptr))
403 {
404 outf->precision(3);
405 outf->setf(ios::left, ios::adjustfield);
406 outf->width(8);
407 *outf << ptr->name();
408 outf->setf(ios::scientific, ios::floatfield);
409 outf->width(8);
410 *outf << (ptr->dur() * 1000.0) << endl;
411 }
412 // outf->precision(3);
413 // outf->setf(ios::left, ios::adjustfield);
414 outf->width(8);
415 *outf << ptr->name();
416 outf->setf(ios::scientific, ios::floatfield);
417 outf->width(8);
418 *outf << (ptr->dur() * 1000.0) << "\t (99,80)" << endl;
419
420 if (outf != &cout)
421 delete outf;
422
423 return write_ok;
424 }
425
426 EST_write_status save_label_names(const EST_String &filename,
427 const EST_Relation &a,
428 const EST_String &features)
429 {
430 EST_Stream_Item *ptr;
431
432 ostream *outf;
433 if (filename == "-")
434 outf = &cout;
435 else
436 outf = new ofstream(filename);
437
438 if (!(*outf))
439 {
440 cerr << "save_label_name: can't open label output file \""
441 << filename << "\"" << endl;
442 return misc_write_error;
443 }
444
445 for (ptr = a.head(); inext(ptr) != 0; ptr = inext(ptr))
446 {
447 *outf << ptr->name();
448 if ((features != "") && (features != "OneLine"))
449 *outf << endl;
450 else
451 *outf << " ";
452 }
453
454 *outf << ptr->name() << endl;
455
456 if (outf != &cout)
457 delete outf;
458 return write_ok;
459 }
460 #endif
461
462 EST_write_status save_RelationList(const EST_String &filename,
463 const EST_RelationList &plist,
464 int time, int path)
465 {
466 EST_Litem *p;
467 EST_Item *ptr;
468 EST_String outname;
469 float start,end;
470
471 ostream *outf;
472 if (filename == "-")
473 outf = &cout;
474 else
475 outf = new ofstream(filename);
476
477 if (!(*outf))
478 {
479 cerr << "save_StreamList: can't open MLF output file \""
480 << filename << "\"\n";
481 return write_fail;
482 }
483
484 *outf << "#!MLF!#\n"; // MLF header/identifier
485 outf->precision(6);
486
487 start = end = 0;
488 for (p = plist.head(); p != 0; p = p->next())
489 {
490 outname = path ? plist(p).name() : basename(plist(p).name());
491 *outf << "\"*/" << outname<<"\"\n";
492 for (ptr = plist(p).head(); ptr != 0; ptr = inext(ptr))
493 {
494 if (time)
495 {
496 outf->width(15);
497 cout.setf(ios::left,ios::adjustfield);
498 *outf << (int)(start * HTK_UNITS_PER_SECOND);
499 outf->width(15);
500 end = ptr->F("end",0.0);
501 *outf << (int)(end * HTK_UNITS_PER_SECOND) << " ";
502 start = end;
503 }
504 *outf << ptr->S("name","0") << endl;
505 }
506 *outf << ".\n";
507 }
508
509 if (outf != &cout)
510 delete outf;
511 return write_ok;
512 }
513
514 EST_write_status save_WordList(const EST_String &filename,
515 const EST_RelationList &plist,
516 int style)
517 {
518 EST_Litem *p;
519 EST_Item *ptr;
520
521 ostream *outf;
522 if (filename == "-")
523 outf = &cout;
524 else
525 outf = new ofstream(filename);
526
527 if (!(*outf))
528 {
529 cerr << "save:WordList: can't open WordList output file \""
530 << filename << "\"\n";
531 return write_fail;
532 }
533
534 for (p = plist.head(); p != 0; p = p->next())
535 {
536 for (ptr = plist(p).head(); inext(ptr) != 0; ptr = inext(ptr))
537 {
538 *outf << ptr->name();
539 if (style == 0)
540 *outf << endl;
541 else
542 *outf << " ";
543 }
544 if (ptr != 0)
545 *outf << ptr->name() << endl;
546 }
547
548 if (outf != &cout)
549 delete outf;
550 return write_ok;
551 }
552
553 EST_write_status save_ind_RelationList(const EST_String &filename,
554 const EST_RelationList &plist,
555 const EST_String &features,
556 int path)
557 {
558 EST_Litem *p;
559 EST_String outname;
560 (void) filename;
561 (void) features;
562
563 for (p = plist.head(); p != 0; p = p->next())
564 {
565 outname = path ? plist(p).name() : basename(plist(p).name());
566 if (plist(p).save(outname,false) != write_ok)
567 return misc_write_error;
568 }
569
570 return write_ok;
571 }
572
573 EST_read_status load_RelationList(const EST_String &filename,
574 EST_RelationList &plist)
575 {
576 EST_TokenStream ts;
577 EST_String fns, name;
578
579 if (((filename == "-") ? ts.open(cin) : ts.open(filename)) != 0)
580 {
581 cerr << "Can't open label input file " << filename << endl;
582 return misc_read_error;
583 }
584 // set up the character constant values for this stream
585 ts.set_SingleCharSymbols(";");
586
587 // Skip over header
588 if (ts.get().string() != "#!MLF!#")
589 {
590 cerr << "Not MLF file\n";
591 return wrong_format;
592 }
593
594 while(!ts.eof())
595 {
596 // put filename in as stream name. The filename is usually surrounded
597 // by quotes, so remove these.
598 fns = ts.get().string();
599 strip_quotes(fns);
600 EST_Relation s(fns);
601 s.f.set("name", fns); // simonk
602 plist.append(s);
603
604 if (read_label_portion(ts, plist.last(), 10000000) == misc_read_error)
605 {
606 cerr << "error: in reading MLF file\n";
607 cerr << "section for file " << fns <<
608 " at line " << ts.linenum() << " is badly formatted\n";
609
610 return misc_read_error;
611 }
612 }
613
614 return format_ok;
615 }
616
617 static void pad_ends(EST_Relation &s, float length)
618 {
619 // add evenly spaced dummy end values to Relation
620 EST_Item *p;
621 int i;
622
623 for (i = 0, p = s.head(); p; p = inext(p), ++i)
624 p->set("end",(length * float(i)/float(s.length())));
625 }
626
627 EST_read_status read_RelationList(EST_RelationList &plist,
628 EST_StrList &files, EST_Option &al)
629 {
630 EST_Litem *p, *plp;
631
632 if (al.val("-itype", 0) == "mlf")
633 {
634 if (load_RelationList(files.first(), plist) != format_ok)
635 exit (-1);
636 }
637 else
638 for (p = files.head(); p; p = p->next())
639 {
640 EST_Relation s(files(p));
641 plist.append(s);
642 plp = plist.tail();
643 if (al.present("-itype"))
644 {
645 if (plist(plp).load(files(p), al.val("-itype")) != format_ok)
646 exit (-1);
647 }
648 else if (plist(plp).load(files(p)) != format_ok)
649 exit (-1);
650 if ((al.val("-itype", 0) == "words") && (al.present("-length")))
651 pad_ends(s, al.fval("-length"));
652
653 }
654
655 return format_ok;
656 }