"Fossies" - the Fresh Open Source Software Archive 
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1995,1996 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Simon King */
34 /* Date : June 1998 */
35 /*************************************************************************/
36
37 #include <cstdlib>
38 #include <cstdio>
39 #include <iostream>
40 #include <fstream>
41 #include "EST_math.h"
42 #include "ling_class/EST_Utterance.h"
43
44 typedef EST_TVector<EST_Item*> EST_Item_ptr_Vector;
45 static EST_Item *const def_val_item_ptr = NULL;
46 template <> EST_Item* const *EST_Item_ptr_Vector::def_val = &def_val_item_ptr;
47
48 static EST_Item* error_return_item_ptr = NULL;
49 template <> EST_Item* *EST_Item_ptr_Vector::error_return = &error_return_item_ptr;
50
51 #if defined(INSTANTIATE_TEMPLATES)
52
53 #include "../base_class/EST_TVector.cc"
54
55 template class EST_TVector<EST_Item*>;
56
57 #endif
58
59 typedef
60 float (*local_cost_function)(const EST_Item *item1,
61 const EST_Item *item2);
62
63 typedef
64 bool (*local_pruning_function)(const int i,
65 const int j,
66 const int max_i,
67 const int max_j);
68
69 bool dp_sub(int i, int j,
70 const EST_Item_ptr_Vector &vr1,
71 const EST_Item_ptr_Vector &vr2,
72 EST_IMatrix &DP_path_i, EST_IMatrix &DP_path_j,
73 local_cost_function lcf,
74 local_pruning_function lpf,
75 EST_Item *null_sym,
76 EST_FMatrix &cost);
77
78 void trace_back_and_link(int i, int j,
79 EST_Item *p1, EST_Item *p2,
80 const EST_IMatrix &DP_path_i,
81 const EST_IMatrix &DP_path_j,
82 EST_Item *null_sym);
83
84 inline bool null_lpf(const int,const int,const int,const int)
85 {
86 return FALSE;
87 }
88
89 bool dp_match(const EST_Relation &lexical,
90 const EST_Relation &surface,
91 EST_Relation &match,
92 local_cost_function lcf,
93 local_pruning_function lpf,
94 EST_Item *null_sym);
95
96
97 bool dp_match(const EST_Relation &lexical,
98 const EST_Relation &surface,
99 EST_Relation &match,
100 local_cost_function lcf,
101 EST_Item *null_sym)
102 {
103 // dp without pruning
104
105 return dp_match(lexical,surface,match,lcf,null_lpf,null_sym);
106
107 }
108
109 static float fixed_ins;
110 static float fixed_del;
111 static float fixed_sub;
112
113 float fixed_local_cost(const EST_Item *s1, const EST_Item *s2)
114 {
115 EST_String null_sym = "nil";
116
117 // otherwise cost is either insertion cost, or cost_matrix value
118 if (s1->name() == s2->name())
119 return 0;
120 else
121 {
122 if (s1->name() == null_sym)
123 return fixed_ins;
124 else if (s2->name() == null_sym)
125 return fixed_del;
126 else
127 return fixed_sub;
128 }
129 }
130
131
132 bool dp_match(const EST_Relation &lexical,
133 const EST_Relation &surface,
134 EST_Relation &match,
135 float ins, float del, float sub)
136 {
137 fixed_ins = ins;
138 fixed_del = del;
139 fixed_sub = sub;
140 EST_Item null_sym;
141
142 return dp_match(lexical, surface, match, fixed_local_cost,
143 null_lpf, &null_sym);
144 }
145
146
147 bool dp_match(const EST_Relation &lexical,
148 const EST_Relation &surface,
149 EST_Relation &match,
150 local_cost_function lcf,
151 local_pruning_function lpf,
152 EST_Item *null_sym)
153 {
154
155
156 // aligns lexical and surface forms using dynamic programming
157 // i.e. the lexical form is transformed into the surface form
158 // by substitutions, insertions and deletions
159
160 // makes links between associated (matching or substituted) items
161 // insertions and deletions are 'left dangling'
162 // links are stored in a new relation called "Match"
163
164 // assumes that local cost computation is cheap (no caching)
165
166 EST_IMatrix DP_path_i,DP_path_j;
167 EST_Item_ptr_Vector vr1,vr2;
168 EST_Item *p;
169 int l1,l2,i,j;
170
171 l1 = lexical.length() + 1;
172 l2 = surface.length() + 1;
173
174 vr1.resize(l1);
175 vr2.resize(l2);
176
177 // prepend null_syms
178 vr1[0] = null_sym;
179 vr2[0] = null_sym;
180
181 for (p=lexical.head(),i=1; p != 0; p = inext(p),i++)
182 vr1[i] = p;
183 for (p=surface.head(),i=1; p != 0; p = inext(p),i++)
184 vr2[i] = p;
185
186 DP_path_i.resize(l1,l2);
187 DP_path_j.resize(l1,l2);
188
189 /*
190 cerr << "Pruning" << endl;
191 for(i=0;i<l1;i++)
192 {
193 for(j=0;j<l2;j++)
194 if(lpf(i,j,l1,l2))
195 cerr << "- ";
196 else
197 cerr << "+ ";
198 cerr << endl;
199 }
200 cerr << endl;
201 */
202
203 // end conditions : must start at (0,0) and finish at (l1-1,l2-1)
204 // i.e. extreme corners of grid
205 EST_FMatrix cost;
206 cost.resize(vr1.length(),vr2.length());
207 for(i=0;i<l1;i++)
208 for(j=0;j<l2;j++)
209 cost.a_no_check(i,j) = -1; // means not computed yet
210
211 if(!dp_sub(l1-1,l2-1,
212 vr1,vr2,
213 DP_path_i,DP_path_j,
214 lcf,lpf,null_sym,cost))
215 {
216 cerr << "No path found (over pruning ?)" << endl;
217 return FALSE;
218 }
219 // make somewhere to record the relations
220 //utt.create_relation("Match");
221 for (p = lexical.head(); p; p = inext(p))
222 match.append(p);
223
224 /*
225 for(i=0;i<l1;i++)
226 {
227 for(j=0;j<l2;j++)
228 cerr << i << "," << j << "=[" << DP_path_i(i,j) << "," << DP_path_j(i,j) << "] ";
229 cerr << endl;
230 }
231 cerr << endl;
232 */
233
234 trace_back_and_link(l1-1,l2-1,
235 match.rlast(),
236 surface.rlast(),
237 DP_path_i,DP_path_j,null_sym);
238
239 return TRUE;
240 }
241
242
243 bool dp_sub(int i, int j,
244 const EST_Item_ptr_Vector &vr1,
245 const EST_Item_ptr_Vector &vr2,
246 EST_IMatrix &DP_path_i, EST_IMatrix &DP_path_j,
247 local_cost_function lcf,
248 local_pruning_function lpf,
249 EST_Item *null_sym,
250 EST_FMatrix &cost)
251 {
252
253 // the goal is to compute cost(i,j)
254
255 // already done ?
256 if(cost(i,j) >= 0)
257 return TRUE;
258
259 //cerr << "sub " << i << " " << j << endl;
260
261 int best_i=-1,best_j=-1;
262 float sub,ins,del;
263 float best_c=MAXFLOAT;
264
265 // prune ?
266 if(lpf(i,j,vr1.length()-1,vr2.length()-1))
267 return FALSE;
268
269
270 // consider all paths into this point
271 // and select the best one (lowest cost)
272
273 if(i==0)
274 {
275 if(j==0)
276 {
277
278 best_i = i;
279 best_j = j;
280 best_c = lcf(null_sym,null_sym);
281 }
282 else
283 {
284
285 // insert j'th item from vr2
286 if(dp_sub(i,j-1,vr1,vr2,
287 DP_path_i,DP_path_j,
288 lcf,lpf, null_sym,cost))
289 {
290 best_i = i;
291 best_j = j-1;
292 best_c = lcf(null_sym,vr2(j)) + cost.a(i,j-1);
293 }
294 else
295 return FALSE;
296 }
297 }
298
299 else if(j==0)
300 {
301
302 // delete i'th item from vr1
303 if(dp_sub(i-1,j,vr1,vr2,
304 DP_path_i,DP_path_j,
305 lcf,lpf, null_sym,cost))
306 {
307 best_i = i-1;
308 best_j = j;
309 best_c = lcf(vr1(i),null_sym) + cost.a(i-1,j);
310 }
311
312 }
313
314 // this is the simplest local constraint (i.e. no constraints !)
315 // which allows unlimited consecutive insertions or deletions
316
317 else
318 {
319
320 if(dp_sub(i-1,j-1,vr1,vr2,
321 DP_path_i,DP_path_j,
322 lcf,lpf, null_sym,cost))
323 {
324 sub = 2 * lcf(vr1(i),vr2(j)) + cost(i-1,j-1);
325 if(sub < best_c)
326 {
327 best_i=i-1;
328 best_j=j-1;
329 best_c = sub;
330 }
331 }
332
333 if(dp_sub(i,j-1,vr1,vr2,
334 DP_path_i,DP_path_j,
335 lcf,lpf, null_sym,cost))
336 {
337 ins=lcf(null_sym,vr2(j)) + cost(i,j-1);
338 if(ins < best_c)
339 {
340 best_i=i;
341 best_j=j-1;
342 best_c = ins;
343 }
344 }
345
346 if(dp_sub(i-1,j,vr1,vr2,
347 DP_path_i,DP_path_j,
348 lcf,lpf, null_sym,cost))
349 {
350 del=lcf(vr1(i),null_sym) + cost(i-1,j);
351 if(del < best_c)
352 {
353 best_i=i-1;
354 best_j=j;
355 best_c = del;
356 }
357 }
358 }
359
360 cost.a(i,j) = best_c;
361 DP_path_i.a_no_check(i,j) = best_i;
362 DP_path_j.a_no_check(i,j) = best_j;
363
364
365 //cerr << "best " << i << "," << j << " = " << best_c << endl;
366
367 if(best_c == MAXFLOAT)
368 // didn't find a better path
369 return FALSE;
370 else
371 return TRUE;
372
373 }
374
375
376 void trace_back_and_link(int i, int j,
377 EST_Item *p1, EST_Item *p2,
378 const EST_IMatrix &DP_path_i,
379 const EST_IMatrix &DP_path_j,
380 EST_Item *null_sym)
381 {
382
383 //cerr << "trace " << i << " " << j << endl;
384
385
386 //int i,j;
387 //i=utt.relation("Lexical")->index(p1);
388 //j=utt.relation("Surface")->index(p2);
389
390 if((p1==0)&&(p2==0))
391 // reached start
392 return;
393
394 if(DP_path_i(i,j) == i-1)
395 {
396 if(DP_path_j(i,j) == j-1)
397 {
398 // match, or substitution
399 //cerr << "sub " << p1->name() << " with " << p2->name() << endl;
400 p1->append_daughter(p2);
401 p1=iprev(p1);
402 p2=iprev(p2);
403 }
404 else
405 // deletion
406 p1=iprev(p1);
407 }
408 else
409 {
410 // insertion
411 // p1->append_daughter(p2); // decorative
412 p2=iprev(p2);
413 }
414
415 trace_back_and_link(DP_path_i(i,j), DP_path_j(i,j),
416 p1,p2,
417 DP_path_i,DP_path_j,
418 null_sym);
419 }
420