"Fossies" - the Fresh Open Source Software Archive

Member "speech_tools/stats/dynamic_program.cc" (4 Sep 2017, 10691 Bytes) of package /linux/misc/speech_tools-2.5.0-release.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "dynamic_program.cc" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 2.4-release_vs_2.5.0-release.

    1 /*************************************************************************/
    2 /*                                                                       */
    3 /*                Centre for Speech Technology Research                  */
    4 /*                     University of Edinburgh, UK                       */
    5 /*                      Copyright (c) 1995,1996                          */
    6 /*                        All Rights Reserved.                           */
    7 /*                                                                       */
    8 /*  Permission is hereby granted, free of charge, to use and distribute  */
    9 /*  this software and its documentation without restriction, including   */
   10 /*  without limitation the rights to use, copy, modify, merge, publish,  */
   11 /*  distribute, sublicense, and/or sell copies of this work, and to      */
   12 /*  permit persons to whom this work is furnished to do so, subject to   */
   13 /*  the following conditions:                                            */
   14 /*   1. The code must retain the above copyright notice, this list of    */
   15 /*      conditions and the following disclaimer.                         */
   16 /*   2. Any modifications must be clearly marked as such.                */
   17 /*   3. Original authors' names are not deleted.                         */
   18 /*   4. The authors' names are not used to endorse or promote products   */
   19 /*      derived from this software without specific prior written        */
   20 /*      permission.                                                      */
   21 /*                                                                       */
   22 /*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        */
   23 /*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
   24 /*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
   25 /*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     */
   26 /*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
   27 /*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
   28 /*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
   29 /*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
   30 /*  THIS SOFTWARE.                                                       */
   31 /*                                                                       */
   32 /*************************************************************************/
   33 /*                   Author :  Simon King                                */
   34 /*                   Date   :  June 1998                                 */
   35 /*************************************************************************/
   36 
   37 #include <cstdlib>
   38 #include <cstdio>
   39 #include <iostream>
   40 #include <fstream>
   41 #include "EST_math.h"
   42 #include "ling_class/EST_Utterance.h"
   43 
   44 typedef EST_TVector<EST_Item*> EST_Item_ptr_Vector;
   45 static EST_Item *const def_val_item_ptr = NULL;
   46 template <> EST_Item* const *EST_Item_ptr_Vector::def_val = &def_val_item_ptr;
   47 
   48 static EST_Item* error_return_item_ptr = NULL;
   49 template <> EST_Item* *EST_Item_ptr_Vector::error_return = &error_return_item_ptr;
   50 
   51 #if defined(INSTANTIATE_TEMPLATES)
   52 
   53 #include "../base_class/EST_TVector.cc"
   54 
   55 template class EST_TVector<EST_Item*>;
   56 
   57 #endif
   58 
   59 typedef
   60 float (*local_cost_function)(const EST_Item *item1,
   61                  const EST_Item *item2);
   62 
   63 typedef
   64 bool (*local_pruning_function)(const int i,
   65                    const int j,
   66                    const int max_i, 
   67                    const int max_j);
   68 
   69 bool dp_sub(int i, int j,
   70         const EST_Item_ptr_Vector &vr1,
   71         const EST_Item_ptr_Vector &vr2,
   72         EST_IMatrix &DP_path_i, EST_IMatrix &DP_path_j, 
   73         local_cost_function lcf,
   74         local_pruning_function lpf,
   75         EST_Item *null_sym,
   76         EST_FMatrix &cost);
   77 
   78 void trace_back_and_link(int i, int j,
   79              EST_Item *p1, EST_Item *p2,
   80              const EST_IMatrix &DP_path_i, 
   81              const EST_IMatrix &DP_path_j,
   82              EST_Item *null_sym);
   83 
   84 inline bool null_lpf(const int,const int,const int,const int)
   85 {
   86     return FALSE;
   87 }
   88 
   89 bool dp_match(const EST_Relation &lexical,
   90           const EST_Relation &surface,
   91           EST_Relation &match,
   92           local_cost_function lcf,
   93           local_pruning_function lpf,
   94           EST_Item *null_sym);
   95 
   96 
   97 bool dp_match(const EST_Relation &lexical,
   98           const EST_Relation &surface,
   99           EST_Relation &match,
  100           local_cost_function lcf,
  101           EST_Item *null_sym)
  102 {
  103     // dp without pruning
  104 
  105     return dp_match(lexical,surface,match,lcf,null_lpf,null_sym);
  106 
  107 }
  108 
  109 static float fixed_ins;
  110 static float fixed_del;
  111 static float fixed_sub;
  112 
  113 float fixed_local_cost(const EST_Item *s1, const EST_Item *s2)
  114 {
  115     EST_String null_sym = "nil";
  116 
  117     // otherwise cost is either insertion cost, or cost_matrix value
  118     if (s1->name() == s2->name())
  119     return 0;
  120     else
  121     {
  122     if (s1->name() == null_sym)
  123         return fixed_ins;
  124     else if (s2->name() == null_sym)
  125         return fixed_del;
  126     else 
  127         return fixed_sub;
  128     }
  129 }
  130 
  131 
  132 bool dp_match(const EST_Relation &lexical,
  133           const EST_Relation &surface,
  134           EST_Relation &match,
  135           float ins, float del, float sub)
  136 {
  137     fixed_ins = ins;
  138     fixed_del = del;
  139     fixed_sub = sub;
  140     EST_Item null_sym;
  141 
  142     return dp_match(lexical, surface, match, fixed_local_cost, 
  143             null_lpf, &null_sym);
  144 }
  145 
  146 
  147 bool dp_match(const EST_Relation &lexical,
  148           const EST_Relation &surface,
  149           EST_Relation &match,
  150           local_cost_function lcf,
  151           local_pruning_function lpf,
  152           EST_Item *null_sym)
  153 {
  154     
  155 
  156     // aligns lexical and surface forms using dynamic programming
  157     // i.e. the lexical form is transformed into the surface form
  158     //      by substitutions, insertions and deletions
  159 
  160     // makes links between associated (matching or substituted) items
  161     // insertions and deletions are 'left dangling'
  162     // links are stored in a new relation called "Match"
  163 
  164     // assumes that local cost computation is cheap (no caching)
  165 
  166     EST_IMatrix DP_path_i,DP_path_j;
  167     EST_Item_ptr_Vector vr1,vr2;
  168     EST_Item *p;
  169     int l1,l2,i,j;
  170     
  171     l1 = lexical.length() + 1;
  172     l2 = surface.length() + 1;
  173 
  174     vr1.resize(l1);
  175     vr2.resize(l2);
  176 
  177     // prepend null_syms
  178     vr1[0] = null_sym;
  179     vr2[0] = null_sym;
  180 
  181     for (p=lexical.head(),i=1; p != 0; p = inext(p),i++)
  182     vr1[i] = p;
  183     for (p=surface.head(),i=1; p != 0; p = inext(p),i++)
  184     vr2[i] = p;
  185     
  186     DP_path_i.resize(l1,l2);
  187     DP_path_j.resize(l1,l2);
  188 
  189 /*
  190     cerr << "Pruning" << endl;
  191     for(i=0;i<l1;i++)
  192     {
  193     for(j=0;j<l2;j++)
  194         if(lpf(i,j,l1,l2))
  195         cerr << "- ";
  196         else
  197         cerr << "+ ";
  198     cerr << endl;
  199     }
  200     cerr << endl;
  201 */
  202 
  203     // end conditions : must start at (0,0) and finish at (l1-1,l2-1)
  204     // i.e. extreme corners of grid
  205     EST_FMatrix cost;
  206     cost.resize(vr1.length(),vr2.length());
  207     for(i=0;i<l1;i++)
  208     for(j=0;j<l2;j++)
  209         cost.a_no_check(i,j) = -1; // means not computed yet
  210 
  211     if(!dp_sub(l1-1,l2-1,
  212            vr1,vr2,
  213            DP_path_i,DP_path_j,
  214            lcf,lpf,null_sym,cost))
  215     {
  216     cerr << "No path found (over pruning ?)" << endl;
  217     return FALSE;
  218     }
  219     // make somewhere to record the relations
  220     //utt.create_relation("Match");
  221     for (p = lexical.head(); p; p = inext(p))
  222     match.append(p);
  223 
  224     /*
  225     for(i=0;i<l1;i++)
  226     {
  227     for(j=0;j<l2;j++)
  228         cerr << i << "," << j << "=[" << DP_path_i(i,j) << "," << DP_path_j(i,j) << "] ";
  229     cerr << endl;
  230     }
  231     cerr << endl;
  232 */
  233 
  234     trace_back_and_link(l1-1,l2-1,
  235             match.rlast(),
  236             surface.rlast(),
  237             DP_path_i,DP_path_j,null_sym);
  238 
  239     return TRUE;
  240 }
  241 
  242 
  243 bool dp_sub(int i, int j,
  244         const EST_Item_ptr_Vector &vr1, 
  245         const EST_Item_ptr_Vector &vr2,
  246         EST_IMatrix &DP_path_i, EST_IMatrix &DP_path_j, 
  247         local_cost_function lcf, 
  248         local_pruning_function lpf, 
  249         EST_Item *null_sym,
  250         EST_FMatrix &cost)
  251 {
  252 
  253     // the goal is to compute cost(i,j)
  254     
  255     // already done ?
  256     if(cost(i,j) >= 0)
  257     return TRUE;
  258 
  259     //cerr << "sub " << i << " " << j << endl;
  260 
  261     int best_i=-1,best_j=-1;
  262     float sub,ins,del;
  263     float best_c=MAXFLOAT;
  264 
  265     // prune ?
  266     if(lpf(i,j,vr1.length()-1,vr2.length()-1))
  267     return FALSE;
  268 
  269 
  270     // consider all paths into this point 
  271     // and select the best one (lowest cost)
  272 
  273     if(i==0)
  274     {
  275     if(j==0)
  276     {
  277 
  278         best_i = i;
  279         best_j = j;
  280         best_c = lcf(null_sym,null_sym);
  281     }
  282     else
  283     {
  284 
  285         // insert j'th item from vr2
  286         if(dp_sub(i,j-1,vr1,vr2,
  287               DP_path_i,DP_path_j,
  288               lcf,lpf, null_sym,cost))
  289         {
  290         best_i = i;
  291         best_j = j-1;
  292         best_c = lcf(null_sym,vr2(j)) + cost.a(i,j-1);
  293         }
  294         else
  295         return FALSE;
  296     }
  297     }
  298 
  299     else if(j==0)
  300     {
  301     
  302     // delete i'th item from vr1
  303     if(dp_sub(i-1,j,vr1,vr2,
  304           DP_path_i,DP_path_j,
  305           lcf,lpf, null_sym,cost))
  306     {
  307         best_i = i-1;
  308         best_j = j;
  309         best_c = lcf(vr1(i),null_sym) + cost.a(i-1,j);
  310     }
  311 
  312     }
  313 
  314     // this is the simplest local constraint (i.e. no constraints !)
  315     // which allows unlimited consecutive insertions or deletions
  316 
  317     else
  318     {
  319 
  320     if(dp_sub(i-1,j-1,vr1,vr2,
  321           DP_path_i,DP_path_j,
  322           lcf,lpf, null_sym,cost))
  323     {
  324         sub = 2 * lcf(vr1(i),vr2(j)) + cost(i-1,j-1);
  325         if(sub < best_c)
  326         {
  327         best_i=i-1;
  328         best_j=j-1;
  329         best_c = sub;
  330         }
  331     }
  332     
  333     if(dp_sub(i,j-1,vr1,vr2,
  334           DP_path_i,DP_path_j,
  335           lcf,lpf, null_sym,cost))
  336     {
  337         ins=lcf(null_sym,vr2(j)) + cost(i,j-1);
  338         if(ins < best_c)
  339         {
  340         best_i=i;
  341         best_j=j-1;
  342         best_c = ins;
  343         }
  344     }
  345     
  346     if(dp_sub(i-1,j,vr1,vr2,
  347           DP_path_i,DP_path_j,
  348           lcf,lpf, null_sym,cost))
  349     {
  350         del=lcf(vr1(i),null_sym) + cost(i-1,j);
  351         if(del < best_c)
  352         {
  353         best_i=i-1;
  354         best_j=j;
  355         best_c = del;
  356         }
  357     }
  358     }
  359 
  360     cost.a(i,j) = best_c;
  361     DP_path_i.a_no_check(i,j) = best_i;
  362     DP_path_j.a_no_check(i,j) = best_j;
  363 
  364 
  365     //cerr << "best " << i << "," << j << " = " << best_c << endl;
  366 
  367     if(best_c == MAXFLOAT)
  368     // didn't find a better path
  369     return FALSE;
  370     else
  371     return TRUE;
  372 
  373 }
  374 
  375 
  376 void trace_back_and_link(int i, int j,
  377              EST_Item *p1, EST_Item *p2,
  378              const EST_IMatrix &DP_path_i, 
  379              const EST_IMatrix &DP_path_j,
  380              EST_Item *null_sym)
  381 {
  382 
  383     //cerr << "trace " << i << " " << j << endl;
  384 
  385 
  386     //int i,j;
  387     //i=utt.relation("Lexical")->index(p1);
  388     //j=utt.relation("Surface")->index(p2);
  389 
  390     if((p1==0)&&(p2==0))
  391     // reached start
  392     return;
  393 
  394     if(DP_path_i(i,j) == i-1)
  395     {
  396     if(DP_path_j(i,j) == j-1)
  397     {
  398         // match, or substitution
  399         //cerr << "sub " << p1->name() << " with " << p2->name() << endl;
  400         p1->append_daughter(p2);
  401         p1=iprev(p1);
  402         p2=iprev(p2);
  403     }
  404     else
  405         // deletion
  406         p1=iprev(p1);
  407     }    
  408     else
  409     {
  410     // insertion
  411     // p1->append_daughter(p2); // decorative
  412     p2=iprev(p2);
  413     }
  414 
  415     trace_back_and_link(DP_path_i(i,j), DP_path_j(i,j),
  416             p1,p2,
  417             DP_path_i,DP_path_j,
  418             null_sym);
  419 }
  420