"Fossies" - the Fresh Open Source Software Archive

Member "speech_tools/ling_class/apml.cc" (4 Sep 2017, 11577 Bytes) of package /linux/misc/speech_tools-2.5.0-release.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "apml.cc" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 2.4-release_vs_2.5.0-release.

    1  /************************************************************************/
    2  /*                                                                      */
    3  /*                Centre for Speech Technology Research                 */
    4  /*                     University of Edinburgh, UK                      */
    5  /*                       Copyright (c) 2002                             */
    6  /*                        All Rights Reserved.                          */
    7  /*                                                                      */
    8  /*  Permission is hereby granted, free of charge, to use and distribute */
    9  /*  this software and its documentation without restriction, including  */
   10  /*  without limitation the rights to use, copy, modify, merge, publish, */
   11  /*  distribute, sublicense, and/or sell copies of this work, and to     */
   12  /*  permit persons to whom this work is furnished to do so, subject to  */
   13  /*  the following conditions:                                           */
   14  /*   1. The code must retain the above copyright notice, this list of   */
   15  /*      conditions and the following disclaimer.                        */
   16  /*   2. Any modifications must be clearly marked as such.               */
   17  /*   3. Original authors' names are not deleted.                        */
   18  /*   4. The authors' names are not used to endorse or promote products  */
   19  /*      derived from this software without specific prior written       */
   20  /*      permission.                                                     */
   21  /*                                                                      */
   22  /*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK       */
   23  /*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING     */
   24  /*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT  */
   25  /*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE    */
   26  /*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES   */
   27  /*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN  */
   28  /*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,         */
   29  /*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF      */
   30  /*  THIS SOFTWARE.                                                      */
   31  /*                                                                      */
   32  /*************************************************************************/
   33  /*                                                                       */
   34  /*                 Author: Rob Clark  (robert@cstr.ed.ac.uk)             */
   35  /* --------------------------------------------------------------------  */
   36  /* Code to read APML format XML as utterances.                           */
   37  /*                                                                       */
   38  /*************************************************************************/
   39 
   40 #include <cstdlib>
   41 #include <cstdio>
   42 #include "EST_THash.h"
   43 #include "EST_error.h"
   44 #include "apml.h"
   45 #include "rxp/XML_Parser.h"
   46 
   47 static EST_Regex simpleIDRegex(".*#id(w\\([0-9]+\\))");
   48 static EST_Regex rangeIDRegex(".*#id(w\\([0-9]+\\)).*id(w\\([0-9]+\\))");
   49 static EST_Regex RXpunc("[\\.,\\?\\!\"]+");
   50 
   51 class Parse_State
   52   {
   53 public:
   54     int depth;
   55     int maxid;
   56     EST_Utterance *utt;
   57     EST_Relation *tokens;
   58     EST_Relation *perf;
   59     EST_Relation *com;
   60     EST_Relation *semstruct;
   61     EST_Relation *emphasis;
   62     EST_Relation *boundary;
   63     EST_Relation *pause;
   64     EST_Item *parent;
   65     EST_Item *pending;
   66     EST_Item *last_token;
   67   };
   68 
   69 class Apml_Parser_Class : public XML_Parser_Class
   70 {
   71 protected:
   72   virtual void document_open(XML_Parser_Class &c,
   73             XML_Parser &p,
   74             void *data);
   75   virtual void document_close(XML_Parser_Class &c,
   76              XML_Parser &p,
   77              void *data);
   78   
   79   virtual void element_open(XML_Parser_Class &c,
   80                XML_Parser &p,
   81                void *data,
   82                const char *name,
   83                XML_Attribute_List &attributes);
   84   virtual void element(XML_Parser_Class &c,
   85           XML_Parser &p,
   86           void *data,
   87           const char *name,
   88           XML_Attribute_List &attributes);
   89   virtual void element_close(XML_Parser_Class &c,
   90             XML_Parser &p,
   91             void *data,
   92             const char *name);
   93 
   94   virtual void pcdata(XML_Parser_Class &c,
   95          XML_Parser &p,
   96          void *data,
   97          const char *chars);
   98   virtual void cdata(XML_Parser_Class &c,
   99         XML_Parser &p,
  100         void *data,
  101         const char *chars);
  102 
  103   virtual void processing(XML_Parser_Class &c,
  104              XML_Parser &p,
  105              void *data,
  106              const char *instruction);
  107   virtual void error(XML_Parser_Class &c,
  108         XML_Parser &p,
  109         void *data);
  110 };
  111 
  112 static void print_attributes(XML_Attribute_List &attributes)
  113 {
  114   XML_Attribute_List::Entries them;
  115 
  116   for(them.begin(attributes); them ; them++)
  117     printf(" %s='%s'", 
  118        (const char *)them->k, 
  119        (const char *)them->v);
  120 }
  121 
  122 EST_read_status apml_read(FILE *file, 
  123                  const EST_String &name,
  124                  EST_Utterance &u,
  125                  int &max_id)
  126 {
  127   (void)max_id;
  128   (void)print_attributes;   // just to shut -Wall up.
  129   Apml_Parser_Class pclass;
  130   Parse_State state;
  131 
  132   u.clear();
  133 
  134   state.utt=&u;
  135 
  136   XML_Parser *parser = pclass.make_parser(file, name, &state);
  137   parser->track_context(TRUE);
  138 
  139   CATCH_ERRORS()
  140     return read_format_error;
  141 
  142   parser->go();
  143 
  144   END_CATCH_ERRORS();
  145 
  146   return read_ok;
  147 }
  148 
  149 
  150 
  151 /** Now we define the callbacks.
  152   */
  153 
  154 void Apml_Parser_Class::document_open(XML_Parser_Class &c,
  155               XML_Parser &p,
  156               void *data)
  157 {
  158   (void)c; (void)p; 
  159   Parse_State *state = (Parse_State *)data;
  160 
  161   state->maxid=0;
  162 
  163   state->depth=1;
  164   state->parent=NULL;
  165   state->pending=NULL;
  166   state->last_token=NULL;
  167 
  168   // create relations:
  169   state->perf = state->utt->create_relation("Perfomative");
  170   state->com = state->utt->create_relation("Communicative");
  171   state->tokens = state->utt->create_relation("Token");
  172   state->semstruct = state->utt->create_relation("SemStructure");
  173   state->emphasis = state->utt->create_relation("Emphasis");
  174   state->boundary = state->utt->create_relation("Boundary");
  175   state->pause = state->utt->create_relation("Pause");
  176 
  177 
  178 }
  179 
  180 void Apml_Parser_Class::document_close(XML_Parser_Class &c,
  181             XML_Parser &p,
  182             void *data)
  183 {
  184   (void)c; (void)p; (void)data;
  185 }
  186 
  187 
  188 void Apml_Parser_Class::element_open(XML_Parser_Class &c,
  189           XML_Parser &p,
  190           void *data,
  191           const char *name,
  192           XML_Attribute_List &attributes)
  193 {
  194   (void)c; (void)p; (void)attributes;
  195   Parse_State *state = (Parse_State *)data;
  196 
  197   //cout << " In element_open: " << name << "\n";
  198 
  199   if (strcmp(name, "turnallocation")==0)
  200     {
  201       // currently ignore
  202       return;
  203     }
  204 
  205   if (strcmp(name, "apml")==0) 
  206     return;  // ignore
  207 
  208   state->depth++;
  209 
  210   if( strcmp(name, "performative")==0
  211       || strcmp(name, "rheme")==0
  212       || strcmp(name, "theme")==0
  213       || strcmp(name, "emphasis")==0
  214       || strcmp(name, "boundary")==0
  215       || strcmp(name, "pause")==0)
  216     {
  217       
  218       // create new item content
  219       EST_Item_Content *cont = new EST_Item_Content();
  220       cont->set_name(name);
  221       
  222       XML_Attribute_List::Entries them;
  223       for(them.begin(attributes); them ; them++)
  224     {
  225       EST_String k = them->k;
  226       EST_String v = them->v;
  227       cont->f.set(k,v);
  228     }
  229 
  230       EST_Item *item;
  231       
  232       if( strcmp(name, "emphasis")==0 )
  233     {
  234       item = state->emphasis->append();
  235       state->pending = item;
  236     }
  237       else if(strcmp(name, "boundary")==0 )
  238     {
  239       item = state->boundary->append();
  240       if(state->last_token)
  241         item->append_daughter(state->last_token);
  242     }
  243       else if(strcmp(name, "pause")==0 )
  244     {
  245       item = state->pause->append();
  246       if(state->last_token)
  247         item->append_daughter(state->last_token);
  248     }
  249       else
  250     {
  251       if (state->parent == NULL)
  252         item = state->semstruct->append();
  253       else
  254         item = state->parent->append_daughter();
  255       state->parent=item;
  256     }
  257 
  258       item->set_contents(cont);
  259       
  260             
  261     }
  262   else
  263     EST_warning("APML Parser: unknown element %s", name);
  264 }
  265 
  266 
  267 void Apml_Parser_Class::element(XML_Parser_Class &c,
  268                 XML_Parser &p,
  269                 void *data,
  270                 const char *name,
  271                 XML_Attribute_List &attributes)
  272 {
  273   (void)c; (void)p; (void)attributes;
  274 
  275   element_open(c, p, data, name, attributes);
  276   element_close(c, p, data, name);
  277 }
  278 
  279 
  280 void Apml_Parser_Class::element_close(XML_Parser_Class &c,
  281            XML_Parser &p,
  282            void *data,
  283            const char *name)
  284 {
  285   (void)c; (void)p; (void)name;
  286   Parse_State *state = (Parse_State *)data;
  287 
  288   if ( strcmp(name, "emphasis")==0
  289        || strcmp(name, "boundary")==0 
  290        || strcmp(name, "pause")==0 )
  291     {
  292       state->depth--;
  293       state->pending=NULL;
  294     }
  295 
  296 
  297   if (strcmp(name, "performative")==0 
  298       || strcmp(name, "theme")==0
  299       || strcmp(name, "rheme")==0)
  300     {
  301       state->depth--;
  302       state->pending = NULL;
  303       state->parent=iup(state->parent);
  304     }
  305 }
  306 
  307 
  308 void Apml_Parser_Class::pcdata(XML_Parser_Class &c,
  309         XML_Parser &p,
  310         void *data,
  311         const char *chars)
  312 {
  313   (void)c; 
  314   
  315  Parse_State *state = (Parse_State *)data;
  316  EST_String strings[255];
  317 
  318  split(chars,strings,255,RXwhite);
  319  
  320  //   for(int cc=0 ; cc < 20 ; ++cc)
  321  //  cout << cc << ": \"" << strings[cc] << "\" (" << strings[cc].length() << ")\n";
  322 
  323  int s=0;
  324 
  325  while( s < 1 || strings[s].length() > 0 )
  326    {
  327      if(strings[s].length() > 0 )
  328        {
  329      // Just Punctuation
  330      if(strings[s].matches(RXpunc))
  331        {
  332          state->last_token->set("punc",strings[s]);
  333        }
  334      // Text and possibly punc
  335      else      
  336        {
  337          EST_Item_Content *cont = new EST_Item_Content();
  338          EST_Item *item;
  339          
  340          if (state->parent == NULL)
  341            item = state->semstruct->append();
  342          else
  343            item = state->parent->append_daughter();
  344          item->set_contents(cont);
  345          
  346          // strip pre-punc here.
  347          int i = strings[s].index(RXpunc);
  348          EST_String ps = strings[s].at(RXpunc);
  349          EST_String intermediate;
  350          if( ps.length() > 0 && i == 0)
  351            {
  352          cout << "Got pre punc: " << ps << endl;
  353          intermediate = strings[s].after(RXpunc);
  354          // cont->set_name(strings[s].before(RXpunc));
  355          item->set("prepunctuation",ps);
  356            }
  357          else
  358            {
  359          intermediate = strings[s];
  360          item->set("prepunctuation","");
  361            }
  362          // now strip punc
  363          ps = intermediate.at(RXpunc);
  364          if( ps.length() > 0 )
  365            {
  366          cout << "Got punc: " << ps << endl;
  367          cont->set_name(intermediate.before(RXpunc));
  368          item->set("punc",ps);
  369            }
  370          else
  371            {
  372          cont->set_name(intermediate);
  373          item->set("punc","");
  374            }
  375 
  376        state->tokens->append(item);
  377        state->last_token = item;
  378        
  379        if(state->pending)
  380          {
  381            state->pending->append_daughter(item);
  382          }
  383        
  384        //  if (state->parent != NULL && p.context(0) == "w")
  385        //  state->parent->set(EST_String("token"), chars);
  386        
  387        //cout << "  got token: " << item->name() << "\n";
  388        }
  389        }
  390      ++s;
  391    }
  392 }
  393 
  394 
  395 void Apml_Parser_Class::cdata(XML_Parser_Class &c,
  396        XML_Parser &p,
  397        void *data,
  398        const char *chars)
  399 {
  400   (void)c; (void)p; (void)data; (void)chars;
  401   // Parse_State *state = (Parse_State *)data;
  402 
  403   //   printf("APML XML Parser [cdata[%s]] %d\n", chars, state->depth);
  404 }
  405 
  406 
  407 void Apml_Parser_Class::processing(XML_Parser_Class &c,
  408         XML_Parser &p,
  409         void *data,
  410         const char *instruction)
  411 {
  412   (void)c; (void)p; 
  413   Parse_State *state = (Parse_State *)data;
  414 
  415   printf("APML XML Parser [proc[%s]] %d\n", instruction, state->depth);
  416 }
  417 
  418 
  419 void Apml_Parser_Class::error(XML_Parser_Class &c,
  420        XML_Parser &p,
  421        void *data)
  422 {
  423   (void)c; (void)p;  (void)data;
  424   // Parse_State *state = (Parse_State *)data;
  425 
  426   EST_error("APML Parser %s", get_error(p));
  427 
  428   est_error_throw();
  429 }
  430 
  431 
  432 
  433 
  434 
  435 
  436