"Fossies" - the Fresh Open Source Software Archive

Member "speech_tools/main/align_main.cc" (4 Sep 2017, 12451 Bytes) of package /linux/misc/speech_tools-2.5.0-release.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "align_main.cc" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 2.4-release_vs_2.5.0-release.

    1 /*************************************************************************/
    2 /*                                                                       */
    3 /*                Centre for Speech Technology Research                  */
    4 /*                     University of Edinburgh, UK                       */
    5 /*                      Copyright (c) 1996,1997                          */
    6 /*                        All Rights Reserved.                           */
    7 /*                                                                       */
    8 /*  Permission is hereby granted, free of charge, to use and distribute  */
    9 /*  this software and its documentation without restriction, including   */
   10 /*  without limitation the rights to use, copy, modify, merge, publish,  */
   11 /*  distribute, sublicense, and/or sell copies of this work, and to      */
   12 /*  permit persons to whom this work is furnished to do so, subject to   */
   13 /*  the following conditions:                                            */
   14 /*   1. The code must retain the above copyright notice, this list of    */
   15 /*      conditions and the following disclaimer.                         */
   16 /*   2. Any modifications must be clearly marked as such.                */
   17 /*   3. Original authors' names are not deleted.                         */
   18 /*   4. The authors' names are not used to endorse or promote products   */
   19 /*      derived from this software without specific prior written        */
   20 /*      permission.                                                      */
   21 /*                                                                       */
   22 /*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        */
   23 /*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
   24 /*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
   25 /*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     */
   26 /*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
   27 /*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
   28 /*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
   29 /*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
   30 /*  THIS SOFTWARE.                                                       */
   31 /*                                                                       */
   32 /*************************************************************************/
   33 /*                     Author :  Alan W Black                            */
   34 /*                     Date   :  September 1999                          */
   35 /*-----------------------------------------------------------------------*/
   36 /*  Simple alignment scoring program to give number of insertions,       */
   37 /*  deletions, errors between a string of symbols and a reference string */
   38 /*  of symbols                                                           */
   39 /*                                                                       */
   40 /*=======================================================================*/
   41 #include <cstdlib>
   42 #include <cstdio>
   43 #include <iostream>
   44 #include <fstream>
   45 #include <cstring>
   46 #include "EST.h"
   47 #include "EST_WFST.h"
   48 
   49 static int align_main(int argc, char **argv);
   50 static void nisttool_align(EST_Option &al);
   51 static void string_align(EST_Option &al);
   52 static void align_score(EST_Utterance &u, const EST_String &refrel,
   53             const EST_String &hyporel, 
   54             const EST_String &alignrel,
   55             int &total,int &ins,int &del,int &sub,int &correct);
   56 static int name_distance(EST_Item *r,EST_Item *h);
   57 void align(EST_Utterance &utt, 
   58        const EST_String &refrel,
   59        const EST_String &hyporel,
   60        const EST_String &alignrel);
   61 static void load_sentence(EST_Utterance &u, const EST_String &relname,
   62               EST_TokenStream &ts);
   63 static void load_sentence(EST_Utterance &u, const EST_String &relname,
   64               EST_String &relval);
   65 
   66 /** @name <command>align</command> <emphasis>align stream with reference stream</emphasis>
   67     @id align-manual
   68   * @toc
   69  */
   70 
   71 //@{
   72 
   73 
   74 /**@name Synopsis
   75   */
   76 //@{
   77 
   78 //@synopsis
   79 
   80 /**
   81 
   82  */
   83 
   84 //@}
   85 
   86 /**@name OPTIONS
   87   */
   88 //@{
   89 
   90 //@options
   91 
   92 //@}
   93 int main(int argc, char **argv)
   94 {
   95 
   96     align_main(argc,argv);
   97 
   98     exit(0);
   99     return 0;
  100 }
  101 
  102 static int align_main(int argc, char **argv)
  103 {
  104     // Top level function generates a WFST from rules
  105     EST_Option al;
  106     EST_StrList files;
  107     EST_String outfile;
  108     EST_String format;
  109 
  110     parse_command_line
  111     (argc, argv,
  112      EST_String("[options] ...\n")+
  113      "Summary: align an hypothesis with a reference string\n"+
  114      "-rfile <ifile>    Reference file\n"+
  115      "-hfile <ifile>    Hypothesis file\n"+
  116      "-rstring <string> Reference string\n"+
  117      "-hstring <string> Hypothesis string\n"+
  118      "-format <string>\n"+
  119          "               Supported formats: strings, nisttool\n",
  120                files, al);
  121     
  122     if (al.present("-o"))
  123     outfile = al.val("-o");
  124     else
  125     outfile = "-";
  126 
  127     if (al.present("-format"))
  128     format = al.val("-format");
  129     else
  130     format = "strings";
  131 
  132     if (format == "strings")
  133         string_align(al);
  134     else if (format == "nisttool")
  135     nisttool_align(al);
  136     else
  137         cout << "Unknown or unhandled format: " << format << endl;  
  138 
  139     return 0;
  140 }
  141 
  142 bool dp_match(const EST_Relation &lexical,
  143           const EST_Relation &surface,
  144           EST_Relation &match,
  145           float ins, float del, float sub);
  146 
  147 static void string_align(EST_Option &al)
  148 {
  149     EST_String refStr = al.val("-rstring");
  150     EST_String hypStr = al.val("-hstring");
  151     EST_Utterance u;
  152     int total,ins,del,sub,correct;
  153 
  154     load_sentence(u,"ref",refStr);
  155     load_sentence(u,"hypo",hypStr);
  156     align(u,"ref","hypo","align");
  157     align_score(u,"ref","hypo","align",total,ins,del,sub,correct);
  158     fprintf(stdout,"words %d\n",total);
  159     fprintf(stdout,"insertions %d\n",ins);
  160     fprintf(stdout,"deletions %d\n",del);
  161     fprintf(stdout,"substitutions %d\n",sub);
  162     fprintf(stdout,"correct %d\n",correct);
  163     fprintf(stdout,"WER %f\n",(100.0 * (float)(ins+del+sub))/total);
  164 }
  165 
  166 static void nisttool_align(EST_Option &al)
  167 {
  168     // Using the format used by the NIST tools for alignment
  169     // Sentence per line with parenthesized id at end
  170     EST_String reffile = al.val("-rfile");
  171     EST_String hypofile = al.val("-hfile");
  172     EST_TokenStream rts,hts;
  173     EST_Item *r, *h;
  174     static EST_Regex id("^(.*)$");
  175     int sents=0;
  176     int total,ins,del,sub,correct;
  177     int s_total,s_ins,s_del,s_sub,s_correct;
  178 
  179     rts.open(reffile);
  180     hts.open(hypofile);
  181     s_total=s_ins=s_del=s_sub=s_correct=0;
  182 
  183     while (!rts.eof())
  184     {
  185     EST_Utterance u;
  186     
  187     load_sentence(u,"ref",rts);
  188     load_sentence(u,"hypo",hts);
  189     r = u.relation("ref")->rlast();
  190     h = u.relation("hypo")->rlast();
  191     if ((!r->name().matches(id)) ||
  192         (r->name() != h->name()))
  193     {
  194         cerr << "Align: failed to match sentence " <<
  195         sents << " at id " << r->name() << endl;
  196     }
  197     else
  198     {
  199         // Ids aren't counted as words
  200         r->unref_all();
  201         h->unref_all();
  202         align(u,"ref","hypo","align");
  203         // This doesn't give exactly the same as the NIST tools
  204         // even though it should (actually I think its better)
  205 //      dp_match(*u.relation("ref"),
  206 //           *u.relation("hypo"),
  207 //           *u.create_relation("align"),
  208 //           3,3,4);
  209         align_score(u,"ref","hypo","align",
  210             total,ins,del,sub,correct);
  211         s_total += total;
  212         s_ins += ins;
  213         s_del += del;
  214         s_sub += sub;
  215         s_correct += correct;
  216     }
  217         sents++;
  218     }
  219 
  220     rts.close();
  221     hts.close();
  222     fprintf(stdout,"sentences %d\n",sents);
  223     fprintf(stdout,"words %d\n",s_total);
  224     fprintf(stdout,"insertions %d\n",s_ins);
  225     fprintf(stdout,"deletions %d\n",s_del);
  226     fprintf(stdout,"substitutions %d\n",s_sub);
  227     fprintf(stdout,"correct %d\n",s_correct);
  228     fprintf(stdout,"WER %f\n",(100.0 * (float)(s_ins+s_del+s_sub))/s_total);
  229 }
  230 
  231 static void load_sentence(EST_Utterance &u,
  232               const EST_String &relname,
  233               EST_TokenStream &ts)
  234 {
  235     EST_Relation *r = u.create_relation(relname);
  236 
  237     do
  238     {
  239     EST_Item *i = r->append();
  240     i->set_name(ts.get());
  241     }
  242     while ((!ts.eoln()) && (!ts.eof()));
  243 }
  244 
  245 static void load_sentence(EST_Utterance &u,
  246               const EST_String &relname,
  247               EST_String &relval)
  248 {
  249     EST_Relation *r = u.create_relation(relname);
  250     EST_StrList strlist;
  251     StringtoStrList(relval, strlist, " ");
  252     EST_StrList::Entries iter;
  253 
  254     for (iter.begin(strlist); iter; ++iter)
  255     {
  256         EST_Item *i = r->append();
  257     i->set_name(*iter);
  258     }
  259 }
  260 
  261 static void align_score(EST_Utterance &u, const EST_String &refrel,
  262             const EST_String &hyporel, 
  263             const EST_String &alignrel,
  264             int &total,int &ins,int &del,int &sub,int &correct)
  265 {
  266     // Score alignment
  267     EST_Item *ri,*hi;
  268     total=ins=del=correct=sub=0;
  269 
  270     for (ri=u.relation(refrel)->first(),
  271      hi=u.relation(hyporel)->first();
  272      ri;
  273      ri=inext(ri),hi=inext(hi))
  274     {
  275     for ( ; (as(hi,alignrel) == 0) && hi ; hi=inext(hi))
  276     {
  277         fprintf(stdout,"inserted: %s\n",(const char *)hi->name());
  278         ins++;
  279     }
  280     for ( ; (daughter1(ri,alignrel) == 0) && ri; ri=inext(ri))
  281     {
  282         fprintf(stdout,"deleted: %s\n",(const char *)ri->name());
  283         del++;
  284     }
  285     if (!ri)
  286         break;
  287     if (name_distance(ri,daughter1(ri,alignrel)) == 0)
  288     {
  289         fprintf(stdout,"correct: %s\n",(const char *)ri->name());
  290         correct++;
  291     }
  292     else
  293     {
  294         fprintf(stdout,"substituted: %s\n",(const char *)ri->name());
  295         sub++;
  296     }
  297     }
  298     // For trailing hypothesized (or ref is nil)
  299     for ( ;  hi ; hi=inext(hi))
  300     {
  301     fprintf(stdout,"inserted: %s\n",(const char *)hi->name());
  302     ins++;
  303     }
  304 
  305     total = u.relation(refrel)->length();
  306 
  307 
  308 //    fprintf(stdout,"total %d ins %d del %d subs %d correct %d\n",
  309 //      total, ins, del, sub, correct);
  310 }
  311 
  312 static int name_distance(EST_Item *r,EST_Item *h)
  313 {
  314     EST_String rname = r->name();
  315     EST_String hname = h->name();
  316     if ((rname == hname) ||
  317     (downcase(rname) == downcase(hname)))
  318     return 0;
  319     else
  320     return 1;
  321 }
  322 
  323 void align(EST_Utterance &utt,
  324        const EST_String &refrel,
  325        const EST_String &hyporel,
  326        const EST_String &alignrel)
  327 {
  328     // Align refrel to hyporel by alignrel
  329     int r_size = utt.relation(refrel)->length();
  330     int h_size = utt.relation(hyporel)->length();
  331     EST_Item *ri = utt.relation(refrel)->first();
  332     EST_Item *hi = utt.relation(hyporel)->first();
  333     int i,j;
  334     int insdel_cost = 3;
  335     int subs_cost = 4;
  336     float to_insert,to_del,to_subs;
  337     float cost;
  338 
  339     EST_Relation *ar = utt.create_relation(alignrel);
  340 
  341     EST_FMatrix dpt(r_size+1,h_size+1);
  342     EST_IMatrix dpp(r_size+1,h_size+1);
  343 
  344     // Initialise first row and column
  345     dpt(0,0) = subs_cost * name_distance(ri,hi);
  346     dpp(0,0) = 0;
  347     for (i=1; i<r_size+1; i++)
  348     {
  349     dpt(i,0) = insdel_cost + dpt(i-1,0);
  350     dpp(i,0) = -1;  // deletion
  351     }
  352     for (j=1; j < h_size+1; j++)
  353     {
  354     dpt(0,j) = insdel_cost + dpt(0,j-1);
  355     dpp(0,j) = 1;   // insertion
  356     }
  357 
  358     ri = utt.relation(refrel)->first();
  359     for (i=1; ri; ri=inext(ri),i++)
  360     {
  361     ar->append(ri);  // for use later
  362     hi = utt.relation(hyporel)->first();
  363     for (j=1; hi; hi=inext(hi),j++)
  364     {
  365         cost = name_distance(ri,hi);
  366         to_insert = insdel_cost + dpt(i,j-1);
  367         to_del = insdel_cost + dpt(i-1,j);
  368         to_subs = (cost * subs_cost) + dpt(i-1,j-1);
  369         if (to_insert < to_del)
  370         {
  371         if (to_insert < to_subs)
  372         {
  373             dpt(i,j) = to_insert;
  374             dpp(i,j) = 1;
  375         }
  376             else
  377         {
  378             dpt(i,j) = to_subs;
  379             dpp(i,j) = 0;
  380         }
  381         }
  382         else
  383         {
  384         if (to_del < to_subs)
  385         {
  386             dpt(i,j) = to_del;
  387             dpp(i,j) = -1;
  388         }
  389             else
  390         {
  391             dpt(i,j) = to_subs;
  392             dpp(i,j) = 0;
  393         }
  394         }
  395     }
  396     }
  397 
  398 //      for (i=1,ri=utt.relation(refrel)->first(); i < r_size+1; i++,ri=inext(ri))
  399 //      {
  400 //      fprintf(stdout,"%10s  ",(const char *)ri->name());
  401 //      for (j=1,hi=utt.relation(hyporel)->first(); j<h_size+1; j++,hi=inext(hi))
  402 //          fprintf(stdout,"%3d/%2d ",(int)dpt(i,j),dpp(i,j));
  403 //      fprintf(stdout,"\n");
  404 //      }
  405 
  406     for (i=r_size,j=h_size,
  407          ri=utt.relation(refrel)->rlast(),
  408          hi=utt.relation(hyporel)->rlast();
  409      ri; i--,ri=iprev(ri))
  410     {
  411     while (dpp(i,j) == 1)
  412     {
  413         j--;
  414 //      fprintf(stdout,"skipping hi %s\n",
  415 //          (const char *)hi->name());
  416         hi=iprev(hi);
  417     }
  418     if (dpp(i,j) == 0)
  419     {
  420 //      fprintf(stdout,"linking %s %s\n",
  421 //          (const char *)ri->name(),
  422 //          (const char *)hi->name());
  423         append_daughter(ri,alignrel,hi);
  424         j--;
  425         hi=iprev(hi);
  426     }
  427 //  else
  428 //      fprintf(stdout,"skipping ri %s\n",
  429 //          (const char *)ri->name());
  430     }
  431 }