"Fossies" - the Fresh Open Source Software Archive 
Member "speech_tools/ling_class/EST_Utterance.cc" (4 Sep 2017, 16030 Bytes) of package /linux/misc/speech_tools-2.5.0-release.tar.gz:
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1995,1996 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Alan W Black */
34 /* Date : May 1998 */
35 /*-----------------------------------------------------------------------*/
36 /* EST_Utterance class source file */
37 /* */
38 /*=======================================================================*/
39 #include <cstdlib>
40 #include <cstdio>
41 #include <iostream>
42 #include <fstream>
43 #include "EST_error.h"
44 #include "EST_string_aux.h"
45 #include "ling_class/EST_Utterance.h"
46 #include "EST_UtteranceFile.h"
47 #include "EST_string_aux.h"
48
49 const EST_String DEF_FILE_TYPE = "est_ascii";
50
51 static void clear_up_sisilist(EST_TKVL<EST_Item_Content *,EST_Item *> &s);
52 static EST_Item *map_ling_item(EST_Item *si,
53 EST_TKVL<EST_Item_Content *,
54 EST_Item *> &s);
55 static void copy_relation(EST_Item *to,EST_Item *from,
56 EST_TKVL<EST_Item_Content *,EST_Item *> &slist);
57
58 Declare_KVL_T(EST_Item_Content *, EST_Item *, KVL_ICP_IP)
59
60 #if defined(INSTANTIATE_TEMPLATES)
61
62 #include "../base_class/EST_TList.cc"
63 #include "../base_class/EST_TKVL.cc"
64
65 Instantiate_KVL_T(EST_Item_Content *, EST_Item *, KVL_ICP_IP)
66 #endif
67
68 EST_Utterance::EST_Utterance()
69 {
70 init();
71 }
72
73 void EST_Utterance::init()
74 {
75 highest_id = 0;
76 f.set("max_id", 0);
77 }
78
79 int EST_Utterance::next_id()
80 {
81 int i = f.val("max_id").Int();
82 f.set("max_id", i+1);
83 return i+1;
84 }
85
86 void EST_Utterance::clear()
87 {
88 relations.clear();
89 }
90
91 void EST_Utterance::clear_relations()
92 {
93 EST_Features::Entries p;
94 for (p.begin(relations); p; p++)
95 ::relation(p->v)->clear();
96 }
97
98 EST_Relation *EST_Utterance::create_relation(const EST_String &n)
99 {
100 EST_Relation *r = relation(n,FALSE);
101 if (r) // there is one already, so clear it
102 r->clear();
103 else
104 {
105 r = new EST_Relation(n);
106 r->set_utt(this);
107 relations.set_val(n,est_val(r));
108 }
109
110 return r;
111 }
112
113 static EST_Item *item_id(EST_Item *p, const EST_String &n)
114 {
115 EST_Item *s, *t;
116
117 t = 0;
118 if ((p == 0) || (p->S("id","0") == n))
119 return p;
120
121 for (s = daughter1(p); s; s = inext(s))
122 {
123 t = item_id(s, n);
124 if (t != 0)
125 return t;
126 }
127
128 return 0;
129 }
130
131 EST_Item *EST_Utterance::id(const EST_String &n) const
132 {
133 EST_Item *s, *t;
134 EST_Features::Entries p;
135
136 for (p.begin(relations); p; p++)
137 for (s = ::relation(p->v)->head(); s; s = next_item(s))
138 if ((t = item_id(s, n)) != 0)
139 return t;
140 EST_error("Could not find item matching id %s\n", (const char *)n);
141 return 0;
142 }
143
144 void EST_Utterance::evaluate_all_features()
145 {
146 EST_Features::Entries p;
147
148 for (p.begin(relations); p; p++)
149 ::relation(p->v)->evaluate_item_features();
150 }
151
152 void EST_Utterance::remove_relation(const EST_String &n)
153 {
154 EST_Relation *r = relation(n,FALSE);
155
156 if (r != 0)
157 relations.remove(n);
158 }
159
160 EST_Relation *EST_Utterance::relation(const char *name,int err) const
161 {
162 if (err)
163 return ::relation(relations.f(name));
164 else
165 {
166 EST_Relation *r = 0;
167 return ::relation(relations.f(name,est_val(r)));
168 }
169 }
170
171 bool EST_Utterance::relation_present(const EST_String name) const
172 {
173 if (!name.contains("("))
174 return relations.present(name);
175 EST_StrList s;
176 BracketStringtoStrList(name, s);
177 return relation_present(s);
178 }
179
180 bool EST_Utterance::relation_present(EST_StrList &names) const
181 {
182 for (EST_Litem *p = names.head(); p ; p = p->next())
183 if (!relations.present(names(p)))
184 return false;
185 return true;
186 }
187
188 EST_Utterance &EST_Utterance::operator=(const EST_Utterance &s)
189 {
190 copy(s);
191 return *this;
192 }
193
194 ostream& operator << (ostream &st, const EST_Utterance &u)
195 {
196 u.save(st,"est_ascii");
197 return st;
198 }
199
200 void EST_Utterance::copy(const EST_Utterance &u)
201 {
202 // Make a copy of the utterance
203 EST_TKVL<EST_Item_Content *,EST_Item *> sisilist;
204 EST_Relation *nrel;
205 EST_Item *rnode;
206
207 clear();
208 f = u.f;
209
210 EST_Features::Entries r;
211 for (r.begin(u.relations); r; r++)
212 {
213 EST_Relation *rr = ::relation(r->v);
214 nrel = create_relation(rr->name());
215 nrel->f = rr->f;
216 if (rr->head() != 0)
217 {
218 rnode = nrel->append(map_ling_item(rr->head(),sisilist));
219 copy_relation(rnode,rr->head(),sisilist);
220 }
221 }
222 clear_up_sisilist(sisilist);
223 }
224
225 static void extra_sub_utterance(EST_Utterance &u,EST_Item *i)
226 {
227 sub_utterance(u,i);
228 }
229
230 void EST_Utterance::sub_utterance(EST_Item *i)
231 {
232 extra_sub_utterance(*this,i);
233 }
234
235 static void merge_tree(EST_Relation *urel,
236 EST_Relation *rel,
237 EST_Item *uroot,
238 EST_Item *root,
239 EST_Features &items,
240 EST_String feature)
241 {
242 EST_Item *n=0;
243 merge_features(uroot->features(), root->features());
244 // copy horizontally
245 if (inext(root)!= NULL)
246 {
247 EST_Item *old = item(items.f(inext(root)->S(feature),est_val(n)));
248 EST_Item *new_root = old?uroot->insert_after(old):uroot->insert_after();
249 merge_tree(urel, rel, new_root, inext(root), items, feature);
250 }
251 // vertically
252 if (idown(root)!= NULL)
253 {
254 EST_Item *old = item(items.f(idown(root)->S(feature),est_val(n)));
255 EST_Item *new_root = old?uroot->insert_below(old):uroot->insert_below();
256 merge_tree(urel, rel, new_root, idown(root), items, feature);
257 }
258 }
259
260 int utterance_merge(EST_Utterance &utt,
261 EST_Utterance &extra,
262 EST_String feature)
263 {
264 // Global merge. Uses the feature to determine which items correspond.
265
266 // First build a table of existing contents.
267
268 EST_Features items;
269 EST_Features::Entries ri;
270 for(ri.begin(utt.relations); ri; ri++)
271 {
272 EST_Relation *rel = relation(ri->v);
273 for(EST_Item *i=rel->head(); i != NULL; i=next_item(i))
274 {
275 EST_String id = i->S(feature);
276 items.set_val(id,est_val(i));
277 }
278 }
279
280 EST_Features::Entries eri;
281 for(eri.begin(extra.relations); eri; eri++)
282 {
283 EST_Relation *rel = relation(eri->v);
284 EST_String rel_name = rel->name();
285
286 while (utt.relation_present(rel_name))
287 rel_name += "+";
288
289 EST_Relation *urel = utt.create_relation(rel_name);
290
291 if (rel->head() != NULL)
292 {
293 EST_Item *n = 0;
294 EST_Item *old = item(items.f(rel->head()->S(feature),est_val(n)));
295 EST_Item *new_root = old?urel->append(old):urel->append();
296 merge_tree(urel, rel, new_root, rel->head(), items, feature);
297 }
298 }
299
300 return TRUE;
301 }
302
303 int utterance_merge(EST_Utterance &utt,
304 EST_Utterance &sub_utt,
305 EST_Item *utt_root,
306 EST_Item *sub_root)
307 {
308 // Joins sub_utt to utt at ling_item at, merging the root
309 // of relname in sub_utt with ling_item at. All other relations
310 // in sub_utt get their root's appended (not merged) with the
311 // corresponding relations in utt (and created if necessary).
312 EST_TKVL<EST_Item_Content *,EST_Item *> sisilist;
313 EST_Item *rnode;
314 EST_Relation *nrel;
315
316 if (utt_root->relation_name() != sub_root->relation_name())
317 EST_error("utterance_merge: items not is same relation");
318
319 if ((utt_root == 0) || (sub_root == 0))
320 EST_error("utterance_merge: items are null");
321
322 // merge features but preserve root id
323 EST_String root_id = utt_root->S("id");
324 merge_features(utt_root->features(), sub_root->features());
325 utt_root->set("id", root_id);
326 // in case root item in sub is referenced elsewhere in the structure
327 sisilist.add_item(sub_root->contents(),utt_root);
328 copy_relation(utt_root,sub_root,sisilist);
329
330 EST_Features::Entries r;
331 for (r.begin(sub_utt.relations); r; r++)
332 {
333 EST_Relation *rr = ::relation(r->v);
334 if (rr->name() != utt_root->relation_name())
335 {
336 if (!utt.relation_present(rr->name()))
337 nrel = utt.create_relation(rr->name());
338 else
339 nrel = utt.relation(rr->name());
340 if (rr->head() != 0)
341 {
342 EST_Item *nn = map_ling_item(rr->head(),sisilist);
343 rnode = nrel->append(nn);
344 copy_relation(rnode,rr->head(),sisilist);
345 }
346 }
347 }
348 sisilist.remove_item(sub_root->contents());
349 clear_up_sisilist(sisilist);
350 return TRUE;
351 }
352
353 static void copy_relation(EST_Item *to,EST_Item *from,
354 EST_TKVL<EST_Item_Content *,EST_Item *> &slist)
355 {
356 // Construct next and down nodes of from, into to, mapping
357 // stream_items through slist
358
359 if (inext(from))
360 copy_relation(to->insert_after(map_ling_item(inext(from),slist)),
361 inext(from),
362 slist);
363 if (idown(from))
364 copy_relation(to->insert_below(map_ling_item(idown(from),slist)),
365 idown(from),
366 slist);
367 }
368
369 static EST_Item *map_ling_item(EST_Item *si,
370 EST_TKVL<EST_Item_Content *,EST_Item *> &s)
371 {
372 // If si is already in s return its map otherwise copy
373 // si and add it to the list
374 EST_Item *msi;
375 EST_Item *def = 0;
376
377 msi = s.val_def(si->contents(),def);
378 if (msi == def)
379 { // First time, so copy it and add to map list
380 msi = new EST_Item(*si);
381 msi->f_remove("id");
382 s.add_item(si->contents(),msi);
383 }
384 return msi;
385 }
386
387 static void clear_up_sisilist(EST_TKVL<EST_Item_Content *,EST_Item *> &s)
388 {
389 // The EST_Items in the value of this need to be freed, its
390 // contents however will not be freed as they will be referenced
391 // somewhere in the copied utterance
392
393 for (EST_Litem *r=s.list.head(); r != 0; r=r->next())
394 delete s.list(r).v;
395
396 }
397
398 static EST_Item *mapped_parent(EST_Item *i,const EST_String &relname,
399 EST_TKVL<EST_Item_Content *,EST_Item *> &s)
400 {
401 EST_Item *p;
402
403 if ((p=parent(i,relname)) == 0)
404 return 0;
405 else if (s.present(p->contents()))
406 return map_ling_item(p,s)->as_relation(relname);
407 else
408 return 0;
409 }
410
411 static void sub_utt_copy(EST_Utterance &sub,EST_Item *i,
412 EST_TKVL<EST_Item_Content *,EST_Item *> &s)
413 {
414 if (s.present(i->contents()))
415 return;
416 else
417 {
418 EST_Item *np,*d;
419 EST_Litem *r;
420 EST_Item *ni = map_ling_item(i,s);
421 for (r = i->relations().list.head(); r; r = r->next())
422 {
423 EST_String relname = i->relations().list(r).k;
424 if (!sub.relation_present(relname))
425 sub.create_relation(relname)->append(ni);
426 else if ((np=mapped_parent(i,relname,s)) != 0)
427 np->append_daughter(ni);
428 else
429 sub.relation(relname)->append(ni);
430
431 // Do its daughters
432 for (d = daughter1(i,relname); d ; d=inext(d))
433 sub_utt_copy(sub,d,s);
434 }
435 }
436 }
437
438 void sub_utterance(EST_Utterance &sub,EST_Item *i)
439 {
440 // Extract i and all its relations, and daughters ... to build
441 // a new utterance in sub.
442 EST_TKVL<EST_Item_Content *,EST_Item *> sisilist;
443
444 sub.clear();
445 sub_utt_copy(sub,i,sisilist);
446
447 clear_up_sisilist(sisilist);
448 }
449
450 EST_read_status EST_Utterance::load(const EST_String &filename)
451 {
452 EST_TokenStream ts;
453 EST_read_status v=format_ok;
454
455 if (((filename == "-") ? ts.open(cin) : ts.open(filename)) != 0)
456 {
457 cerr << "load_utt: can't open utterance input file "
458 << filename << endl;
459 return misc_read_error;
460 }
461
462 v = load(ts);
463
464 if (v == read_ok)
465 f.set("filename", filename);
466
467 ts.close();
468
469 return v;
470 }
471
472 EST_read_status EST_Utterance::load(EST_TokenStream &ts)
473 {
474 EST_read_status stat=read_error;
475 int pos = ts.tell();
476 int max_id;
477
478 init(); // we're committed to reading something so clear utterance
479
480 for(int n=0; n< EST_UtteranceFile::map.n() ; n++)
481 {
482 EST_UtteranceFileType t = EST_UtteranceFile::map.token(n);
483
484 if (t == uff_none)
485 continue;
486
487 EST_UtteranceFile::Info *info = &(EST_UtteranceFile::map.info(t));
488
489 if (! info->recognise)
490 continue;
491
492 EST_UtteranceFile::Load_TokenStream * l_fun = info->load;
493
494 if (l_fun == NULL)
495 continue;
496
497 ts.seek(pos);
498
499 stat = (*l_fun)(ts, *this, max_id);
500
501 if (stat == read_ok)
502 {
503 // set_file_type(EST_UtteranceFile::map.value(t));
504 break;
505 }
506 }
507
508 highest_id = max_id;
509 return stat;
510 }
511
512 EST_write_status EST_Utterance::save(const EST_String &filename,
513 const EST_String &type) const
514 {
515 EST_write_status v;
516 ostream *outf;
517
518 if (filename == "-")
519 outf = &cout;
520 else
521 outf = new ofstream(filename);
522
523 if (!(*outf))
524 return write_fail;
525
526 v = save(*outf,type);
527
528 if (outf != &cout)
529 delete outf;
530
531 return v;
532 }
533
534 EST_write_status EST_Utterance::save(ostream &outf,
535 const EST_String &type) const
536 {
537 EST_String save_type = (type == "") ? DEF_FILE_TYPE : type;
538
539 EST_UtteranceFileType t = EST_UtteranceFile::map.token(save_type);
540
541 if (t == uff_none)
542 {
543 cerr << "Utterance: unknown filetype in saving " << save_type << endl;
544 return write_fail;
545 }
546
547 EST_UtteranceFile::Save_TokenStream * s_fun = EST_UtteranceFile::map.info(t).save;
548
549 if (s_fun == NULL)
550 {
551 cerr << "Can't save utterances to files type " << save_type << endl;
552 return write_fail;
553 }
554
555 return (*s_fun)(outf, *this);
556 }
557
558 void utt_2_flat_repr( const EST_Utterance &utt,
559 EST_String &flat_repr )
560 {
561 EST_Item *phrase = utt.relation("Phrase")->head();
562 for( ; phrase; phrase=inext(phrase) ){
563 flat_repr += "<";
564
565 EST_Item *word = daughter1(phrase);
566 for( ; word; word=inext(word) ){
567 flat_repr += "{";
568
569 EST_Item *syllable = daughter1(word, "SylStructure");
570 for( ; syllable; syllable=inext(syllable) ){
571 flat_repr += EST_String::cat( "(", syllable->S("stress") );
572
573 EST_Item *phone = daughter1(syllable);
574 for( ; phone; phone=inext(phone) )
575 flat_repr += EST_String::cat( " ", phone->S("name"), " " );
576 flat_repr += ")";
577 }
578 flat_repr += "}";
579 }
580 flat_repr += EST_String::cat( "> _", phrase->S("name"), " " );
581 }
582 }