"Fossies" - the Fresh Open Source Software Archive

Member "speech_tools/grammar/scfg/EST_SCFG_Chart.cc" (4 Sep 2017, 15071 Bytes) of package /linux/misc/speech_tools-2.5.0-release.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "EST_SCFG_Chart.cc" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 2.4-release_vs_2.5.0-release.

    1 /*************************************************************************/
    2 /*                                                                       */
    3 /*                Centre for Speech Technology Research                  */
    4 /*                     University of Edinburgh, UK                       */
    5 /*                         Copyright (c) 1997                            */
    6 /*                        All Rights Reserved.                           */
    7 /*                                                                       */
    8 /*  Permission is hereby granted, free of charge, to use and distribute  */
    9 /*  this software and its documentation without restriction, including   */
   10 /*  without limitation the rights to use, copy, modify, merge, publish,  */
   11 /*  distribute, sublicense, and/or sell copies of this work, and to      */
   12 /*  permit persons to whom this work is furnished to do so, subject to   */
   13 /*  the following conditions:                                            */
   14 /*   1. The code must retain the above copyright notice, this list of    */
   15 /*      conditions and the following disclaimer.                         */
   16 /*   2. Any modifications must be clearly marked as such.                */
   17 /*   3. Original authors' names are not deleted.                         */
   18 /*   4. The authors' names are not used to endorse or promote products   */
   19 /*      derived from this software without specific prior written        */
   20 /*      permission.                                                      */
   21 /*                                                                       */
   22 /*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        */
   23 /*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
   24 /*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
   25 /*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     */
   26 /*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
   27 /*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
   28 /*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
   29 /*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
   30 /*  THIS SOFTWARE.                                                       */
   31 /*                                                                       */
   32 /*************************************************************************/
   33 /*             Author :  Alan W Black                                    */
   34 /*             Date   :  June 1997                                       */
   35 /*-----------------------------------------------------------------------*/
   36 /*                                                                       */
   37 /* A SCFG chart parser                                                   */
   38 /*                                                                       */
   39 /*=======================================================================*/
   40 #include <cstdlib>
   41 #include "siod.h"
   42 #include "EST_math.h"
   43 #include "EST_SCFG.h"
   44 #include "EST_SCFG_Chart.h"
   45 
   46 EST_SCFG_Chart_Edge::EST_SCFG_Chart_Edge()
   47 {
   48 }
   49 
   50 EST_SCFG_Chart_Edge::EST_SCFG_Chart_Edge(double prob,
   51                      int d1, int d2,
   52                      int pos)
   53 {
   54     p_d1 = d1;
   55     p_d2 = d2;
   56     p_pos = pos;
   57     p_prob = prob;
   58 }
   59 
   60 EST_SCFG_Chart_Edge::~EST_SCFG_Chart_Edge()
   61 {
   62 }
   63 
   64 LISP EST_SCFG_Chart::print_edge(int start, int end, int p,
   65                 EST_SCFG_Chart_Edge *e)
   66 {
   67     // Return a lisp representation of the edge
   68 
   69     if (e->prob() == 0)
   70     {
   71     return NIL;  // failed
   72     }
   73     else if (start+1 == end)
   74     {
   75     // unary rule, preterminal
   76     LISP r = cons(rintern(grammar->nonterminal(p)),
   77          cons(flocons(e->prob()),
   78               cons(flocons(start),
   79            cons(flocons(end),
   80             cons(rintern(grammar->terminal(e->d1())),
   81              NIL)))));
   82     return r;
   83     }
   84     else 
   85     {
   86     //name prob start end daughters
   87     EST_SCFG_Chart_Edge *d1, *d2;
   88 
   89     d1 = edges[start][e->pos()][e->d1()];
   90     d2 = edges[e->pos()][end][e->d2()];
   91 
   92     LISP daughters = 
   93         cons(print_edge(start,e->pos(),e->d1(),d1),
   94          cons(print_edge(e->pos(),end,e->d2(),d2),
   95               NIL));
   96     LISP r = cons(rintern(grammar->nonterminal(p)),
   97          cons(flocons(e->prob()),
   98               cons(flocons(start),
   99                cons(flocons(end),
  100                 daughters))));
  101     return r;
  102     }
  103 }
  104 
  105 EST_SCFG_Chart::EST_SCFG_Chart()
  106 {
  107     n_vertices = 0;
  108     edges = 0;
  109     wfst = 0;
  110     emptyedge = 0;
  111     grammar_local = TRUE;
  112     grammar = new EST_SCFG;
  113 }
  114 
  115 EST_SCFG_Chart::~EST_SCFG_Chart()
  116 {
  117     // delete all the vertices
  118     
  119     delete_edge_table();
  120     if (grammar_local)
  121     delete grammar;
  122 }
  123 
  124 void EST_SCFG_Chart::set_grammar_rules(EST_SCFG &imported_grammar)
  125 {
  126     if (grammar_local)
  127     delete grammar;
  128     grammar_local = FALSE;
  129     grammar = &imported_grammar;
  130 }
  131 
  132 void EST_SCFG_Chart::set_grammar_rules(LISP r)
  133 { 
  134     grammar->set_rules(r); 
  135 }
  136 
  137 void EST_SCFG_Chart::setup_wfst(EST_Relation *s,const EST_String &name)
  138 {
  139     // Set up well formed substring table from feature name in each
  140     // stream item in s
  141     setup_wfst(s->head(),0,name);
  142 }
  143 
  144 void EST_SCFG_Chart::setup_wfst(EST_Item *s, EST_Item *e,const EST_String &name)
  145 {
  146     // Set up well formed substring table from feature name in each
  147     // stream item in s
  148     EST_Item *p;
  149     int n;
  150 
  151     delete_edge_table();
  152     for (n_vertices=1,p=s; p != e; p=inext(p))
  153     n_vertices++;
  154     setup_edge_table();
  155 
  156     for (n=0,p=s; p != e; p=inext(p),n++)
  157     {
  158     int term = grammar->terminal(p->f(name).string());
  159     if (term == -1)
  160     {
  161         cerr << "SCFG_Chart: unknown terminal symbol \"" <<
  162         p->f(name).string() << "\"" << endl;
  163         term = 0;  // to avoid crashing
  164     }
  165     wfst[n] = new EST_SCFG_Chart_Edge(1.0,term,0,-1);
  166     }
  167 }
  168 
  169 void EST_SCFG_Chart::delete_edge_table()
  170 {
  171     int i,j,k;
  172 
  173     if (wfst == 0) return;
  174 
  175     for (i=0; i < n_vertices; i++)
  176     {
  177     delete wfst[i];
  178     for (j=0; j < n_vertices; j++)
  179     {
  180         for (k=0; k < grammar->num_nonterminals(); k++)
  181         if (edges[i][j][k] != emptyedge)
  182             delete edges[i][j][k];
  183         delete [] edges[i][j];
  184     }
  185     delete [] edges[i];
  186     }
  187     delete [] wfst;
  188     delete [] edges;
  189     delete emptyedge;
  190 
  191     wfst = 0;
  192     edges = 0;
  193 
  194 }
  195 
  196 void EST_SCFG_Chart::setup_edge_table()
  197 {
  198     int i,j,k;
  199     int nt = grammar->num_nonterminals();
  200 
  201     wfst = new EST_SCFG_Chart_Edge*[n_vertices];
  202     edges = new EST_SCFG_Chart_Edge ***[n_vertices];
  203     emptyedge = new EST_SCFG_Chart_Edge(0,0,0,0);
  204 
  205     for (i=0; i < n_vertices; i++)
  206     {
  207     wfst[i] = 0;
  208     edges[i] = new EST_SCFG_Chart_Edge **[n_vertices];
  209     for (j=0; j < n_vertices; j++)
  210     {
  211         edges[i][j] = new EST_SCFG_Chart_Edge *[nt];
  212         for (k=0; k < nt; k++)
  213         edges[i][j][k] = 0;
  214     }
  215     }
  216 }
  217 
  218 double EST_SCFG_Chart::find_best_tree_cal(int start,int end,int p)
  219 {
  220     // Find the best parse for non/terminal p between start and end
  221     int best_j = -1;
  222     int best_q = -1, best_r = -1;
  223     double best_prob = 0;
  224 
  225     if (end-1 == start)
  226     {
  227     best_prob = grammar->prob_U(p,wfst[start]->d1());
  228     if (best_prob > 0)
  229         edges[start][end][p] = 
  230         new EST_SCFG_Chart_Edge(best_prob*wfst[start]->prob(),
  231                     wfst[start]->d1(),0,-1);
  232     else
  233         edges[start][end][p] = emptyedge;
  234     return best_prob;
  235     }
  236     else
  237     {
  238     // for all rules expanding p find total and best prob
  239     double s=0,t_prob,left,right;
  240     int j,q,r;
  241     int nt = grammar->num_nonterminals();
  242 
  243     for (q=0; q < nt; q++)
  244         for (r=0; r < nt; r++)
  245         {
  246         double pBpqr = grammar->prob_B(p,q,r);
  247         if (pBpqr > 0)
  248         {
  249             for (j=start+1; j < end; j++)
  250             {
  251             left=find_best_tree(start,j,q);
  252             if (left > 0)
  253             {
  254                 right = find_best_tree(j,end,r);
  255                 t_prob =  pBpqr * left * right;
  256                 if (t_prob > best_prob)
  257                 {
  258                 best_prob = t_prob;
  259                 best_q = q;
  260                 best_r = r;
  261                 best_j = j;
  262                 }
  263                 s += t_prob;
  264             }
  265             }
  266         }
  267         }
  268 
  269     if (best_prob > 0)
  270         edges[start][end][p] = 
  271         new EST_SCFG_Chart_Edge(s,best_q,best_r,best_j);
  272     else
  273         edges[start][end][p] = emptyedge;
  274     return s;
  275     }
  276 }
  277 
  278 void EST_SCFG_Chart::parse(void)
  279 {
  280     // do the parsing, find best parse only
  281     if (n_vertices - 1 > 0)
  282         find_best_tree(0,n_vertices-1,grammar->distinguished_symbol());
  283 
  284 }
  285 
  286 LISP EST_SCFG_Chart::find_parse()
  287 {
  288     // Extract the parse from the edge table
  289     EST_SCFG_Chart_Edge *top;
  290 
  291     top = edges[0][n_vertices-1][grammar->distinguished_symbol()];
  292 
  293     if (top == 0)
  294     return NIL;   // no parse
  295     else
  296     return print_edge(0,n_vertices-1,grammar->distinguished_symbol(),top);
  297 }
  298 
  299 void EST_SCFG_Chart::extract_parse(EST_Relation *syn,
  300                    EST_Relation *word,
  301                    int force)
  302 {
  303     // Build a tree stream in Syn linking the leafs of Syn to those
  304     // in word, force guarantees a parse is necessary (though probably
  305     // a random one)
  306 
  307     extract_parse(syn,word->head(),0,force);
  308 }
  309 
  310 void EST_SCFG_Chart::extract_parse(EST_Relation *syn,
  311                    EST_Item *s, EST_Item *e, int force)
  312 {
  313     // Build a tree stream in Syn linking the leafs of Syn to those
  314     // in word
  315     EST_Item *p;
  316     int num_words;
  317 
  318     for (num_words=0,p=s; p != e; p=inext(p))
  319     num_words++;
  320 
  321     if (num_words != (n_vertices-1))
  322     {
  323     cerr << "SCFG_Chart: extract_parse, number of items in link stream " <<
  324         " different from those in parse tree" << endl;
  325     return;
  326     }
  327 
  328     EST_SCFG_Chart_Edge *top;
  329     EST_Item *w = s;
  330 
  331     top = edges[0][n_vertices-1][grammar->distinguished_symbol()];
  332 
  333     if (top == 0)
  334     return;   // failed to parse so no parse tree
  335     else
  336     {
  337     EST_Item *ss = syn->append();
  338     
  339     extract_edge(0,n_vertices-1,grammar->distinguished_symbol(),top,
  340              ss,&w);
  341 
  342     if ((force) && (!daughter1(ss)))  // no parse found but *need* one
  343         extract_forced_parse(0,n_vertices-1,ss,w);
  344     return;
  345     }
  346 }
  347 
  348 void EST_SCFG_Chart::extract_forced_parse(int start, int end,
  349                       EST_Item *s, EST_Item *w)
  350 {
  351     // Extract a parse even though one wasn't found (often happens
  352     // with single word or dual word sentences.
  353     
  354     if (start+1 == end)
  355     {
  356     s->append_daughter(w);
  357     s->set_name(grammar->nonterminal(grammar->distinguished_symbol()));
  358     s->set("prob",0.0);  // maybe should be epsilon
  359     }
  360     else
  361     {
  362     extract_forced_parse(start,start+1,s->append_daughter(),w);
  363     EST_Item *st = s->append_daughter();
  364     st->set_name(grammar->nonterminal(grammar->distinguished_symbol()));
  365     st->set("prob",0.0);  // maybe should be epsilon
  366     EST_Item *nw = inext(w);
  367     extract_forced_parse(start+1,end,st,nw);
  368     }
  369 }
  370 
  371 void EST_SCFG_Chart::extract_edge(int start, int end, int p,
  372                   EST_SCFG_Chart_Edge *e,
  373                   EST_Item *s,
  374                   EST_Item **word)
  375 {
  376     // Build the node for this edge, and all of its daughters
  377 
  378     if (e->prob() == 0)
  379     {
  380     return;  // failed
  381     }
  382     else if (start+1 == end)
  383     {
  384     // unary rule, preterminal
  385     s->append_daughter((*word));
  386     s->set_name(grammar->nonterminal(p));   
  387     s->set("prob",(float)e->prob());
  388     *word = inext(*word);  // increment along "word" stream
  389     return;
  390     }
  391     else 
  392     {
  393     //name prob start end daughters
  394     EST_SCFG_Chart_Edge *d1, *d2;
  395 
  396     d1 = edges[start][e->pos()][e->d1()];
  397     d2 = edges[e->pos()][end][e->d2()];
  398 
  399     // Inserts the new nodes in the tree (and creates new si nodes)
  400     s->append_daughter();
  401     s->append_daughter();
  402 
  403     extract_edge(start,e->pos(),e->d1(),d1,daughter1(s),word);
  404     extract_edge(e->pos(),end,e->d2(),d2,daughter2(s),word);
  405 
  406     s->set_name(grammar->nonterminal(p));   
  407     s->set("prob",(float)e->prob());
  408 
  409     return;
  410     }
  411 }
  412 
  413 void EST_SCFG_chart_load_relation(EST_Relation &s,LISP sent)
  414 {
  415     // Set up well formed substring table form lisp list
  416     // Setup a relation and call the standard method of set up
  417     LISP w,f;
  418 
  419     for (w=sent; w != NIL; w=cdr(w))
  420     {
  421     EST_Item *word = s.append();
  422     
  423     if (consp(car(w)))
  424     {   // a word with other feature info
  425         word->set_name(get_c_string(car(car(w))));
  426         if (consp(car(cdr(car(w)))))
  427         for (f=car(cdr(car(w))); f != NIL; f=cdr(f))
  428         {
  429             if (FLONUMP(car(cdr(car(f)))))
  430             word->set(get_c_string(car(car(f))),
  431                   get_c_float(car(cdr(car(f)))));
  432             else 
  433             word->set(get_c_string(car(car(f))),
  434                   get_c_string(car(cdr(car(f)))));
  435         }
  436         else // we assume its a POS value, cause they didn't say
  437         word->set("name",get_c_string(car(cdr(car(w)))));
  438     }
  439     else // for easy we set the pos field to the be the name
  440         word->set("name",get_c_string(car(w)));
  441     }
  442 }
  443 
  444 void scfg_parse(EST_Relation *Word, const EST_String &name, 
  445         EST_Relation *Syntax, EST_SCFG &grammar)
  446 {
  447     // Parse feature name in Word to build Syntax relation
  448     // The relations names above are *not* the names of the relations
  449     // just named to reflect there conceptual usage
  450     EST_SCFG_Chart chart;
  451 
  452     chart.set_grammar_rules(grammar);
  453     chart.setup_wfst(Word,name);
  454     chart.parse();
  455     chart.extract_parse(Syntax,Word,TRUE);
  456 
  457     return;
  458 }
  459 
  460 LISP scfg_parse(LISP string, LISP grammar)
  461 {
  462     // Parse and return full parse
  463     EST_SCFG_Chart chart;
  464     EST_Relation words;
  465     LISP parse;
  466 
  467     chart.set_grammar_rules(grammar);
  468 
  469     EST_SCFG_chart_load_relation(words,string);
  470     chart.setup_wfst(&words,"name");
  471     chart.parse();
  472     parse = chart.find_parse();
  473 
  474     return parse;
  475 }
  476 
  477 LISP scfg_parse(LISP string, EST_SCFG &grammar)
  478 {
  479     // Parse and return full parse
  480     EST_SCFG_Chart chart;
  481     EST_Relation words;
  482     LISP parse;
  483 
  484     chart.set_grammar_rules(grammar);
  485 
  486     EST_SCFG_chart_load_relation(words,string);
  487     chart.setup_wfst(&words,"name");
  488     chart.parse();
  489     parse = chart.find_parse();
  490 
  491     return parse;
  492 }
  493 
  494 LISP scfg_bracketing_only(LISP parse)
  495 {
  496     if (consp(siod_nth(4,parse)))
  497     {
  498     LISP d,ds;
  499 
  500     for (d=cdr(cdr(cdr(cdr(parse)))),ds=NIL; d != NIL; d=cdr(d))
  501         ds = cons(scfg_bracketing_only(car(d)),ds);
  502     return reverse(ds);
  503     }
  504     else
  505     return siod_nth(4,parse);
  506 
  507 }
  508 
  509 void EST_SCFG_traintest::test_crossbrackets()
  510 {
  511     // Compare bracketing of best parse to bracketing on original
  512     // For each sentence parse it (unbracketed) and then
  513     // find the percentage of valid brackets in parsed version that
  514     // are valid in the original one.
  515     int c;
  516     LISP parse;
  517     EST_SuffStats cb;
  518     int failed = 0;
  519     int fully_contained=0;
  520 
  521     for (c=0; c < corpus.length(); c++)
  522     {
  523     LISP flat = siod_flatten(corpus.a_no_check(c).string());
  524     
  525     parse =  scfg_parse(flat,*this);
  526     if (parse == NIL)
  527     {
  528         failed++;
  529         continue;
  530     }
  531 
  532     EST_bracketed_string parsed(scfg_bracketing_only(parse));
  533     EST_SuffStats vs;
  534 
  535     count_bracket_crossing(corpus.a_no_check(c),parsed,vs);
  536 
  537     if (vs.mean() == 1)
  538         fully_contained++;
  539     cb += vs.mean();
  540     }
  541 
  542     cout << "cross bracketing " << cb.mean()*100 << " (" << failed << 
  543     " failed " << (float)(100.0*fully_contained)/corpus.length() <<
  544         "% fully consistent from " << corpus.length() 
  545         << " sentences)" << endl;
  546 
  547 }
  548 
  549 void count_bracket_crossing(const EST_bracketed_string &ref,
  550                 const EST_bracketed_string &test,
  551                 EST_SuffStats &vs)
  552 {
  553     int i,j;
  554 
  555     if (ref.length() != test.length())
  556     {
  557     EST_error("bracket_crossing: sentences of different lengths");
  558     }
  559 
  560     for (i=0; i < ref.length(); i++)
  561     for (j=i+1; j <= ref.length(); j++)
  562         if (test.valid(i,j) == 1)
  563         {
  564         if (ref.valid(i,j) == 0)
  565             vs += 0;
  566         else
  567             vs += 1;
  568         }
  569 }