"Fossies" - the Fresh Open Source Software Archive 
Member "speech_tools/ling_class/EST_relation_aux.cc" (4 Sep 2017, 18636 Bytes) of package /linux/misc/speech_tools-2.5.0-release.tar.gz:
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1995,1996 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Paul Taylor and Simon King */
34 /* Date : June 1995 */
35 /*-----------------------------------------------------------------------*/
36 /* Relation class auxiliary routines */
37 /* */
38 /*=======================================================================*/
39 #include <cstdlib>
40 #include <iostream>
41 #include <fstream>
42 #include <cmath>
43 #include "EST_types.h"
44 #include "ling_class/EST_Relation.h"
45 #include "ling_class/EST_relation_aux.h"
46 #include "EST_string_aux.h"
47 #include "EST_io_aux.h"
48 #include "EST_Option.h"
49 #include "EST_Token.h"
50
51 static int is_in_class(const EST_String &name, EST_StrList &s);
52
53 bool dp_match(const EST_Relation &lexical,
54 const EST_Relation &surface,
55 EST_Relation &match,
56 float ins, float del, float sub);
57
58
59 float start(EST_Item *n)
60 {
61 return (iprev(n) == 0) ? 0.0 : iprev(n)->F("end");
62 }
63
64 float duration(EST_Item *n)
65 {
66 return n->F("end") - start(n);
67 }
68
69 void quantize(EST_Relation &a, float q)
70 {
71 EST_Item *a_ptr;
72 float end;
73
74 for (a_ptr = a.head(); a_ptr != 0; a_ptr = inext(a_ptr))
75 {
76 end = a_ptr->F("end") / q;
77 end = rint(end);
78 end = end * q;
79 a_ptr->set("end", end);
80 }
81 }
82
83 // edit labels using a sed file to do the editing
84
85 int edit_labels(EST_Relation &a, EST_String sedfile)
86 {
87 EST_Item *a_ptr;
88 char command[100], name[100], newname[100], sf[100];
89 FILE *fp;
90 strcpy(sf, sedfile);
91 EST_String file1, file2;
92 file1 = make_tmp_filename();
93 file2 = make_tmp_filename();
94
95 fp = fopen(file1, "wb");
96 if (fp == NULL)
97 {
98 fprintf(stderr,"edit_labels: cannot open \"%s\" for writing\n",
99 (const char *)file1);
100 return -1;
101 }
102 for (a_ptr = a.head(); a_ptr != 0; a_ptr = inext(a_ptr))
103 {
104 strcpy(name, a_ptr->name());
105 fprintf(fp, "%s\n", name);
106 }
107 fclose(fp);
108 strcpy(command, "cat ");
109 strcat(command, file1);
110 strcat(command, " | sed -f ");
111 strcat(command, sedfile);
112 strcat(command, " > ");
113 strcat(command, file2);
114
115 printf("command: %s\n", command);
116 system(command);
117
118 fp = fopen(file2, "rb");
119 if (fp == NULL)
120 {
121 fprintf(stderr,"edit_labels: cannot open \"%s\" for reading\n",
122 (const char *)file2);
123 return -1;
124 }
125 for (a_ptr = a.head(); a_ptr != 0; a_ptr = inext(a_ptr))
126 {
127 fscanf(fp, "%s", newname);
128 // cout << "oldname: " << a_ptr->name() << " newname: " << newname << endl;
129 a_ptr->set_name(newname);
130 }
131 fclose(fp);
132 return 0;
133 }
134
135 // make new EST_Relation from start and end points.
136 void extract(const EST_Relation &orig, float s,
137 float e, EST_Relation &ex)
138 {
139 EST_Item *a;
140 EST_Item *tmp;
141
142 for (a = orig.head(); a != 0; a = inext(a))
143 if ((a->F("end") > s) && (start(a) < e))
144 {
145 tmp = ex.append(a);
146 if ((a->F("end") > e))
147 tmp->set("end", e);
148 }
149 }
150
151 void merge_all_label(EST_Relation &seg, const EST_String &labtype)
152 {
153 EST_Item *a_ptr, *n_ptr;
154 (void)labtype; // unused parameter
155
156 for (a_ptr = seg.head(); a_ptr != seg.tail(); a_ptr = n_ptr)
157 {
158 n_ptr = inext(a_ptr);
159 if (a_ptr->name() == inext(a_ptr)->name())
160 seg.remove_item(a_ptr);
161 }
162 }
163
164 void change_label(EST_Relation &seg, const EST_String &oname,
165 const EST_String &nname)
166 {
167 EST_Item *a_ptr;
168
169 for (a_ptr = seg.head(); a_ptr != 0; a_ptr = inext(a_ptr))
170 if (a_ptr->name() == oname)
171 a_ptr->set_name(nname);
172 }
173
174 void change_label(EST_Relation &seg, const EST_StrList &oname,
175 const EST_String &nname)
176 {
177 EST_Item *a_ptr;
178 EST_Litem *p;
179
180 for (a_ptr = seg.head(); a_ptr != 0; a_ptr = inext(a_ptr))
181 for (p = oname.head(); p ; p = p->next())
182 if (a_ptr->name() == oname(p))
183 a_ptr->set_name(nname);
184 }
185
186 static int is_in_class(const EST_String &name, EST_StrList &s)
187 {
188 EST_Litem *p;
189
190 for (p = s.head(); p; p = p->next())
191 if (name == s(p))
192 return TRUE;
193
194 return FALSE;
195 }
196
197 int check_vocab(EST_Relation &a, EST_StrList &vocab)
198 {
199 EST_Item *s;
200 for (s = a.head(); s; s = inext(s))
201 if (!is_in_class(s->name(), vocab))
202 {
203 cerr<<"Illegal entry in file " <<a.name()<< ":\"" << *s << "\"\n";
204 return -1;
205 }
206 return 0;
207 }
208
209 void convert_to_broad_class(EST_Relation &seg, const EST_String &class_type,
210 EST_Option &options)
211 {
212 // class_type contains a list of whitepsace separated segment names.
213 // This function looks at each segment and adds a feature "pos"
214 // if its name is contained in the list.
215 EST_String tmp_class_type = class_type + "_list";
216 EST_String bc_list(options.val(tmp_class_type, 1));
217 EST_StrList pos_list;
218 EST_TokenStream ts;
219
220 ts.open_string(bc_list);
221 while (!ts.eof())
222 pos_list.append(ts.get().string());
223
224 convert_to_broad(seg, pos_list);
225 }
226
227 void convert_to_broad(EST_Relation &seg, EST_StrList &pos_list,
228 EST_String broad_name, int polarity)
229 {
230 EST_Item *a_ptr;
231 if (broad_name == "")
232 broad_name = "pos";
233
234 for (a_ptr = seg.head(); a_ptr != 0; a_ptr = inext(a_ptr))
235 if (is_in_class(a_ptr->name(), pos_list))
236 a_ptr->set(broad_name, (polarity) ? 1 : 0);
237 else
238 a_ptr->set(broad_name, (polarity) ? 0 : 1);
239 }
240
241 void label_map(EST_Relation &seg, EST_Option &map)
242 {
243 EST_Item *p;
244
245 for (p = seg.head(); p != 0; p = inext(p))
246 {
247 if (map.present(p->name()))
248 {
249 if (map.val(p->name()) == "!DELETE")
250 seg.remove_item(p);
251 else
252 p->set_name(map.val(p->name()));
253 }
254
255 }
256 }
257
258 void shift_label(EST_Relation &seg, float shift)
259 {
260 //shift every end time by adding x seconds.
261 EST_Item *a_ptr;
262
263 for (a_ptr = seg.head(); a_ptr != 0; a_ptr = inext(a_ptr))
264 a_ptr->set("end", a_ptr->F("end") + shift);
265 }
266
267 void RelationList_select(EST_RelationList &mlf, EST_StrList filenames, bool
268 exact_match)
269 {
270 // select only files in 'filenames'
271 // remove all others from mlf
272 EST_Litem *fptr, *ptr;
273 bool flag;
274
275 // if not exact match, only match basenames
276 EST_StrList tmp_filenames;
277 for (ptr = filenames.head(); ptr != NULL; ptr = ptr->next())
278 if(exact_match)
279 tmp_filenames.append( filenames(ptr) );
280 else
281 tmp_filenames.append( basename(filenames(ptr)) );
282
283 for(fptr=mlf.head(); fptr != NULL;)
284 {
285 flag=false;
286 for (ptr = tmp_filenames.head(); ptr != NULL; ptr = ptr->next())
287 if(exact_match)
288 {
289 if(tmp_filenames(ptr) == mlf(fptr).name())
290 {
291 flag=true;
292 break;
293 }
294 }
295 else if(mlf(fptr).name().contains(tmp_filenames(ptr)))
296 {
297 flag=true;
298 break;
299 }
300
301 if(!flag)
302 {
303 fptr = mlf.remove(fptr);
304
305 if(fptr==0) // must have removed head of list
306 fptr=mlf.head();
307 else
308 fptr=fptr->next();
309 }
310 else
311 fptr=fptr->next();
312 }
313 tmp_filenames.clear();
314 }
315
316 // look for a single file called "filename" and make a EST_Relation out of
317 // this
318 EST_Relation RelationList_extract(EST_RelationList &mlf, const EST_String &filename, bool base)
319 {
320
321 EST_Litem *p;
322 EST_String test, ref;
323
324 if (base)
325 for (p = mlf.head(); p; p = p->next())
326 {
327 if (basename(mlf(p).name(), "*")==basename(filename, "*"))
328 return mlf(p);
329 }
330 else
331 for (p = mlf.head(); p; p = p->next())
332 {
333 if (basename(mlf(p).name()) == filename)
334 return mlf(p);
335 }
336
337 cerr << "No match for file " << filename << " found in mlf\n";
338 EST_Relation d;
339 return d;
340 }
341
342 // combine all relation in MLF into a single relation.
343 EST_Relation RelationList_combine(EST_RelationList &mlf)
344 {
345 EST_Litem *p;
346 EST_Relation all;
347 EST_Item *s, *t = 0;
348 float last = 0.0;
349
350 for (p = mlf.head(); p; p = p->next())
351 {
352 for (s = mlf(p).head(); s; s = inext(s))
353 {
354 t = all.append();
355 t->set("name", s->S("name"));
356 t->set("end", s->F("end") + last);
357 cout << "appended t " << t << endl;
358 }
359 last = (t != 0) ? t->F("end") : 0.0;
360 }
361 return all;
362 }
363
364 EST_Relation RelationList_combine(EST_RelationList &mlf, EST_Relation &key)
365 {
366 EST_Litem *p;
367 EST_Relation all;
368 EST_Item *s, *t = 0, *k;
369 float st;
370
371 if (key.length() != mlf.length())
372 {
373 cerr << "RelationList has " << mlf.length() << " elements: expected "
374 << key.length() << " from key file\n";
375 return all;
376 }
377
378 for (k = key.head(), p = mlf.head(); p; p = p->next(), k = inext(k))
379 {
380 st = start(k);
381 for (s = mlf(p).head(); s; s = inext(s))
382 {
383 t = all.append();
384 t->set("name", s->S("name"));
385 t->set("end", (s->F("end") + st));
386 }
387 }
388 return all;
389 }
390
391 int relation_divide(EST_RelationList &slist, EST_Relation &lab,
392 EST_Relation &keylab,
393 EST_StrList &blank, EST_String ext)
394 { // divides a single relation into multiple chunks according to the
395 // keylab relation. If the keylab boundary falls in the middle of a label,
396 // the label is assigned to the chunk which has the most overlap with
397 // it. Some labels may be specified in the "blank" list which means thy
398 // are duplicated across boundaries.
399
400 EST_Relation a, newkey;
401 EST_Item *s, *k, *t = 0, *n;
402 EST_String filename;
403 float kstart;
404
405 slist.clear();
406
407 if ((keylab.tail())->F("end") < (lab.tail())->F("end"))
408 {
409 cerr << "Key file must extend beyond end of label file\n";
410 return -1;
411 }
412
413 // find a the first keylab that will make a non-empty file
414 for (k = keylab.head(); k ; k = inext(k))
415 if (k->F("end") > lab.head()->F("end"))
416 break;
417
418 filename = (EST_String)k->f("file");
419 a.f.set("name", (filename + ext));
420 kstart = 0.0;
421
422 for (s = lab.head(); s; s = inext(s))
423 {
424 n = inext(s);
425 if (n == 0)
426 {
427 t = a.append(s);
428 t->set("end", (s->F("end") - kstart));
429 break;
430 }
431 if (n->F("end") > k->F("end"))
432 {
433 if (((n->F("end") - k->F("end")) <
434 (k->F("end") - start(n))) ||
435 is_in_class(n->name(), blank))
436 {
437 a.append(s);
438 t->set("end", (s->F("end") - kstart));
439
440 t = a.append(n);
441 t->set("end", (k->F("end") - kstart));
442
443 if (!is_in_class(n->name(), blank))
444 s = inext(s);
445 }
446 else
447 {
448 t = a.append(s);
449 t->set("end", (k->F("end") - kstart));
450 }
451
452 slist.append(a);
453 k = inext(k);
454 kstart = start(k);
455 a.clear();
456 filename = (EST_String)k->f("file");
457 a.f.set("name", (filename + ext));
458 }
459 else
460 {
461 t = a.append(s);
462 t->set("end", (s->F("end") - kstart));
463 }
464 }
465 slist.append(a);
466
467 return 0;
468 }
469
470 int relation_divide2(EST_RelationList &mlf, EST_Relation &lab,
471 EST_Relation &keylab, EST_String ext)
472 {
473 EST_Relation a, newkey;
474 EST_Item *s, *k, *t;
475 float kstart;
476
477 mlf.clear();
478
479 if ((keylab.tail())->F("end") < (lab.tail())->F("end"))
480 {
481 cerr << "Key file must extend beyond end of label file\n";
482 return -1;
483 }
484
485 k = keylab.head();
486 a.f.set("name", (k->name() + ext));
487 kstart = 0.0;
488
489 for (s = lab.head(); s; s = inext(s))
490 {
491 t = a.append();
492 t->set_name(s->name());
493 t->set("end", (s->F("end") - kstart));
494
495 if (s->F("end") > k->F("end"))
496 {
497 cout << "appending " << a;
498 mlf.append(a);
499
500 kstart = s->F("end");
501 k->set("end", (s->F("end")));
502 k = inext(k);
503 a.clear();
504 a.f.set("name", (k->name() + ext));
505 }
506 }
507 cout << "appending " << a;
508 mlf.append(a);
509
510 return 0;
511 }
512
513
514
515
516 void map_match_times(EST_Relation &target, const EST_String &match_name,
517 const EST_String &time_name, bool do_start)
518 {
519 EST_Item *s, *t, *p;
520 float prev_end, inc, first_end, last_end;
521 int i;
522
523 // first pass, copy times as appropriate, and find first
524 // and last defined ends
525 // This is hacky and certainly won't work for many cases
526
527 first_end = -1.0;
528 prev_end = 0.0;
529 last_end = 0.0;
530
531 // cout << "surface: " << surface << endl;
532
533 for (s = target.head(); s; s = inext(s))
534 {
535 if ((t = daughter1(s->as_relation(match_name))) != 0)
536 {
537 s->set(time_name + "end", t->F("end"));
538 if (do_start)
539 s->set(time_name + "start", t->F("start"));
540
541 last_end = t->F("end");
542 if (first_end < 0.0)
543 first_end = t->F("end");
544 }
545 }
546
547 if (!target.head()->f_present(time_name + "end"))
548 {
549 target.head()->set(time_name + "end", first_end / 2.0);
550 if (do_start)
551 target.head()->set(time_name + "start", 0.0);
552 }
553
554 if (!target.tail()->f_present(time_name + "end"))
555 {
556 target.tail()->set(time_name + "end", last_end + 0.01);
557 if (do_start)
558 target.tail()->set(time_name + "start", last_end);
559 }
560
561 for (s = target.head(); s; s = inext(s))
562 {
563 if (!s->f_present(time_name + "end"))
564 {
565 // cout << "missing end feature for " << *s << endl;
566 for (i = 1, p = s; p; p = inext(p), ++i)
567 if (p->f_present(time_name + "end"))
568 break;
569 inc = (p->F(time_name + "end") - prev_end) / ((float) i);
570 // cout << "inc is : " << inc << endl;
571
572 // cout << "stop phone is " << *p << endl;
573
574 for (i = 1; s !=p ; s = inext(s), ++i)
575 {
576 s->set(time_name + "end", (prev_end + ((float) i * inc)));
577 if (do_start)
578 s->set(time_name + "start", (prev_end+((float) (i - 1 )* inc)));
579 }
580 }
581 prev_end = s->F("end");
582 }
583 }
584
585 void dp_time_align(EST_Utterance &utt, const EST_String &source_name,
586 const EST_String &target_name,
587 const EST_String &time_name,
588 bool do_start)
589 {
590 utt.create_relation("Match");
591
592 dp_match(*utt.relation(target_name), *utt.relation(source_name),
593 *utt.relation("Match"), 7.0, 7.0, 7.0);
594
595 map_match_times(*utt.relation(target_name), "Match", time_name, do_start);
596 }
597
598
599 EST_Litem *RelationList_ptr_extract(EST_RelationList &mlf, const EST_String &filename, bool base)
600 {
601 EST_Litem *p;
602 EST_String test, ref;
603
604 if (base)
605 for (p = mlf.head(); p; p = p->next())
606 {
607 if (basename(mlf(p).name(), "*")==basename(filename, "*"))
608 return p;
609 }
610 else
611 for (p = mlf.head(); p; p = p->next())
612 if (mlf(p).name() == filename)
613 return p;
614
615 cerr << "No match for file " << filename << " found in mlf\n";
616 return 0;
617 }
618
619 void relation_convert(EST_Relation &lab, EST_Option &al, EST_Option &op)
620 {
621 if (al.present("-shift"))
622 shift_label(lab, al.fval("-shift"));
623
624 // fix option later.
625 if (al.present("-extend"))
626 al.override_fval("-length",
627 al.fval("-extend",0) * lab.tail()->F("end"));
628
629 // quantize (ie round up or down) label times
630 if (al.present("-q"))
631 quantize(lab, al.fval("-q"));
632
633 if (al.present("-start"))
634 {
635 if (!al.present("-end"))
636 cerr << "-start option must be used with -end option\n";
637 else
638 extract(lab, al.fval("-start"), al.fval("-end"), lab);
639 }
640
641 if (al.present("-class"))
642 convert_to_broad_class(lab, al.val("-class"), op);
643
644 else if (al.present("-pos"))
645 {
646 EST_StrList bclass;
647 StringtoStrList(al.val("-lablist"), bclass);
648 convert_to_broad(lab, bclass);
649 }
650 else if (al.present("-sed"))
651 edit_labels(lab, al.val("-sed"));
652 else if (al.present("-map"))
653 {
654 EST_Option map;
655 if (map.load(al.val("-map")) != format_ok)
656 return;
657 label_map(lab, map);
658 }
659 }
660
661
662
663 void print_relation_features(EST_Relation &stream)
664 {
665 EST_Item *s;
666 EST_Features::Entries p;
667
668 for (s = stream.head(); s; s = inext(s))
669 {
670 cout << s->name() << "\t:";
671 for(p.begin(s->features()); p; ++p)
672 cout << p->k << " "
673 << p->v << "; ";
674 cout << endl;
675 }
676
677 }
678
679
680 void build_RelationList_hash_table(EST_RelationList &mlf,
681 EST_hashedRelationList &hash_table,
682 const bool base)
683 {
684 EST_Litem *p;
685 if (base)
686 for (p = mlf.head(); p; p = p->next())
687 hash_table.add_item(basename(mlf(p).name(), "*"),
688 &(mlf(p)));
689 else
690 for (p = mlf.head(); p; p = p->next())
691 hash_table.add_item(mlf(p).name(),
692 &(mlf(p)));
693 }
694
695
696 bool hashed_RelationList_extract(EST_Relation* &rel,
697 const EST_hashedRelationList &hash_table,
698 const EST_String &filename, bool base)
699 {
700 EST_Relation *d;
701 EST_String fname = filename;
702 int found;
703
704 if (base)
705 fname=basename(filename, "*");
706
707 d=hash_table.val(fname,found);
708
709 if(found)
710 {
711 rel = d;
712 return true;
713 }
714 cerr << "No match for file " << fname << " found in mlf\n";
715 return false;
716 }
717
718