"Fossies" - the Fresh Open Source Software Archive

Member "speech_tools/ling_class/solexml.cc" (4 Sep 2017, 11121 Bytes) of package /linux/misc/speech_tools-2.5.0-release.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "solexml.cc" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 2.4-release_vs_2.5.0-release.

    1  /************************************************************************/
    2  /*                                                                      */
    3  /*                Centre for Speech Technology Research                 */
    4  /*                     University of Edinburgh, UK                      */
    5  /*                       Copyright (c) 1996,1997                        */
    6  /*                        All Rights Reserved.                          */
    7  /*                                                                      */
    8  /*  Permission is hereby granted, free of charge, to use and distribute */
    9  /*  this software and its documentation without restriction, including  */
   10  /*  without limitation the rights to use, copy, modify, merge, publish, */
   11  /*  distribute, sublicense, and/or sell copies of this work, and to     */
   12  /*  permit persons to whom this work is furnished to do so, subject to  */
   13  /*  the following conditions:                                           */
   14  /*   1. The code must retain the above copyright notice, this list of   */
   15  /*      conditions and the following disclaimer.                        */
   16  /*   2. Any modifications must be clearly marked as such.               */
   17  /*   3. Original authors' names are not deleted.                        */
   18  /*   4. The authors' names are not used to endorse or promote products  */
   19  /*      derived from this software without specific prior written       */
   20  /*      permission.                                                     */
   21  /*                                                                      */
   22  /*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK       */
   23  /*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING     */
   24  /*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT  */
   25  /*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE    */
   26  /*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES   */
   27  /*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN  */
   28  /*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,         */
   29  /*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF      */
   30  /*  THIS SOFTWARE.                                                      */
   31  /*                                                                      */
   32  /*************************************************************************/
   33  /*                                                                       */
   34  /*                 Author: Richard Caley (rjc@cstr.ed.ac.uk)             */
   35  /* --------------------------------------------------------------------  */
   36  /* Code to reas SOLE format XML as utterances.                           */
   37  /*                                                                       */
   38  /*************************************************************************/
   39 
   40 #include <cstdlib>
   41 #include <cstdio>
   42 #include "EST_THash.h"
   43 #include "EST_error.h"
   44 #include "solexml.h"
   45 #include "rxp/XML_Parser.h"
   46 
   47 static EST_Regex simpleIDRegex(".*#id(w\\([0-9]+\\))");
   48 static EST_Regex rangeIDRegex(".*#id(w\\([0-9]+\\)).*id(w\\([0-9]+\\))");
   49 
   50 class Parse_State
   51   {
   52 public:
   53     int depth;
   54     EST_String relName;
   55     EST_Utterance *utt;
   56     EST_Relation *rel;
   57     EST_Item *parent;
   58     EST_Item *current;
   59 
   60     EST_THash<EST_String, EST_Item_Content *> contents;
   61 
   62     Parse_State() : contents(100) {}
   63   };
   64 
   65 class Sole_Parser_Class : public XML_Parser_Class
   66 {
   67 protected:
   68   virtual void document_open(XML_Parser_Class &c,
   69             XML_Parser &p,
   70             void *data);
   71   virtual void document_close(XML_Parser_Class &c,
   72              XML_Parser &p,
   73              void *data);
   74   
   75   virtual void element_open(XML_Parser_Class &c,
   76                XML_Parser &p,
   77                void *data,
   78                const char *name,
   79                XML_Attribute_List &attributes);
   80   virtual void element(XML_Parser_Class &c,
   81           XML_Parser &p,
   82           void *data,
   83           const char *name,
   84           XML_Attribute_List &attributes);
   85   virtual void element_close(XML_Parser_Class &c,
   86             XML_Parser &p,
   87             void *data,
   88             const char *name);
   89 
   90   virtual void pcdata(XML_Parser_Class &c,
   91          XML_Parser &p,
   92          void *data,
   93          const char *chars);
   94   virtual void cdata(XML_Parser_Class &c,
   95         XML_Parser &p,
   96         void *data,
   97         const char *chars);
   98 
   99   virtual void processing(XML_Parser_Class &c,
  100              XML_Parser &p,
  101              void *data,
  102              const char *instruction);
  103   virtual void error(XML_Parser_Class &c,
  104         XML_Parser &p,
  105         void *data);
  106 };
  107 
  108 static void print_attributes(XML_Attribute_List &attributes)
  109 {
  110   XML_Attribute_List::Entries them;
  111 
  112   for(them.begin(attributes); them ; them++)
  113     printf(" %s='%s'", 
  114        (const char *)them->k, 
  115        (const char *)them->v);
  116 }
  117 
  118 EST_read_status solexml_read(FILE *file, 
  119                  const EST_String &name,
  120                  EST_Utterance &u,
  121                  int &max_id)
  122 {
  123   (void)max_id;
  124   (void)print_attributes;   // just to shut -Wall up.
  125   Sole_Parser_Class pclass;
  126   Parse_State state;
  127 
  128   u.clear();
  129 
  130   state.utt=&u;
  131 
  132   XML_Parser *parser = pclass.make_parser(file, name, &state);
  133   parser->track_context(TRUE);
  134 
  135   CATCH_ERRORS()
  136     return read_format_error;
  137 
  138   parser->go();
  139 
  140   END_CATCH_ERRORS();
  141 
  142   return read_ok;
  143 }
  144 
  145 static void ensure_relation(Parse_State *state)
  146 {
  147   if (state->rel==NULL)
  148     {
  149       state->rel = state->utt->create_relation(state->relName);
  150     }
  151 }
  152 
  153 static EST_Item_Content *get_contents(Parse_State *state, EST_String id)
  154 {
  155   EST_Item_Content *c = state->contents.val(id);
  156   if (c==NULL)
  157     {
  158       c = new EST_Item_Content();
  159       state->contents.add_item(id, c);
  160     }
  161 
  162   return c;
  163 }
  164 
  165 static void extract_ids(XML_Attribute_List &attributes, 
  166             EST_TList<EST_String> &ids)
  167 {
  168   EST_String val;
  169   static int count;
  170   if (attributes.present("id"))
  171     {
  172       val = attributes.val("id");
  173       ids.append(val);
  174     }
  175   else if (attributes.present("href"))
  176     {
  177       val = attributes.val("href");
  178       int starts[EST_Regex_max_subexpressions];
  179       int ends[EST_Regex_max_subexpressions];
  180       
  181       if (val.matches(simpleIDRegex, 0, starts, ends))
  182     {
  183       EST_String n = val.at(starts[1], ends[1]-starts[1]);
  184       
  185       ids.append("w" + n);
  186     }
  187       else if (val.matches(rangeIDRegex, 0, starts, ends))
  188     {
  189       int n1 = atoi(val.at(starts[1], ends[1]-starts[1]));
  190       int n2 = atoi(val.at(starts[2], ends[2]-starts[2]));
  191       
  192       for(int i=n1; i<=n2; i++)
  193         {
  194           char buf[100];
  195           sprintf(buf, "w%d", i);
  196           
  197           ids.append(buf);
  198         }
  199       
  200     }
  201       else
  202     EST_warning("element with bad ID or HREF '%s'", (const char *)val);
  203     }
  204   else
  205     {
  206       char buf[100];
  207       sprintf(buf, "n%d", ++count);
  208       
  209       ids.append(buf);
  210       return;
  211     } 
  212 
  213 }
  214 
  215 
  216 /** Now we define the callbacks.
  217   */
  218 
  219 void Sole_Parser_Class::document_open(XML_Parser_Class &c,
  220               XML_Parser &p,
  221               void *data)
  222 {
  223   (void)c; (void)p; 
  224   Parse_State *state = (Parse_State *)data;
  225 
  226   state->depth=1;
  227   state->rel=NULL;
  228   state->parent=NULL;
  229   state->current=NULL;
  230 }
  231 
  232 void Sole_Parser_Class::document_close(XML_Parser_Class &c,
  233             XML_Parser &p,
  234             void *data)
  235 {
  236   (void)c; (void)p; (void)data;
  237 }
  238 
  239 
  240 void Sole_Parser_Class::element_open(XML_Parser_Class &c,
  241           XML_Parser &p,
  242           void *data,
  243           const char *name,
  244           XML_Attribute_List &attributes)
  245 {
  246     (void)c; (void)p; (void)attributes;
  247     Parse_State *state = (Parse_State *)data;
  248 
  249     state->depth++;
  250 
  251     if (strcmp(name, "solexml")==0)
  252     {
  253         state->relName=attributes.val("relation");
  254         printf("start solexml relation=%s\n", (const char *)state->relName);
  255         return;
  256     }
  257     else if (strcmp(name, "text-elem")==0)
  258     {
  259         // ignore these
  260         return;
  261     }
  262 
  263     ensure_relation(state);
  264 
  265     if (strcmp(name, "anaphora-elem")==0 
  266         || strcmp(name, "wordlist")==0
  267         || strcmp(name, "w")==0)
  268     {
  269         EST_TList<EST_String> ids;
  270         extract_ids(attributes, ids);
  271 
  272         EST_Litem *idp = ids.head();
  273         bool first=TRUE;
  274         for(; idp!= NULL; idp = idp->next())
  275     {
  276             EST_String id = ids(idp);
  277             if (id==EST_String::Empty)
  278                 XML_Parser_Class::error(c, p, data, EST_String("Element With No Id"));
  279 
  280             if (first)
  281                 first=FALSE;
  282             else
  283         {
  284                 state->current = state->parent;
  285                 state->parent=iup(state->parent);
  286         }
  287         
  288 
  289             EST_Item_Content *cont = get_contents(state, id);
  290 
  291             cont->set_name(id);
  292       
  293         XML_Attribute_List::Entries them;
  294         for(them.begin(attributes); them ; them++)
  295             {
  296         EST_String k = them->k;
  297         EST_String v = them->v;
  298         cont->f.set(k,v);
  299             }
  300 
  301             EST_Item *item;
  302 
  303             if (state->current == NULL)
  304                 if (state->parent == NULL)
  305                     item = state->rel->append();
  306                 else
  307                     item = state->parent->insert_below();
  308             else 
  309                 item = state->current->insert_after();
  310 
  311             item->set_contents(cont);
  312 
  313             state->current=NULL;
  314             state->parent=item;
  315     }
  316     }
  317     else
  318         EST_warning("SOLE XML Parser: unknown element %s", name);
  319 }
  320 
  321 
  322 void Sole_Parser_Class::element(XML_Parser_Class &c,
  323                 XML_Parser &p,
  324                 void *data,
  325                 const char *name,
  326                 XML_Attribute_List &attributes)
  327 {
  328     (void)c; (void)p; (void)attributes;
  329     Parse_State *state = (Parse_State *)data;
  330 
  331     if (strcmp(name, "language")==0)
  332     {
  333         state->utt->f.set("language", attributes.val("name"));
  334         return;
  335     }
  336 
  337     element_open(c, p, data, name, attributes);
  338     element_close(c, p, data, name);
  339 }
  340 
  341 
  342 void Sole_Parser_Class::element_close(XML_Parser_Class &c,
  343            XML_Parser &p,
  344            void *data,
  345            const char *name)
  346 {
  347     (void)c; (void)p; (void)name;
  348     Parse_State *state = (Parse_State *)data;
  349 
  350     if (strcmp(name, "anaphora-elem")==0 
  351         || strcmp(name, "wordlist")==0
  352         || strcmp(name, "w")==0)
  353     {
  354         state->depth--;
  355         state->current = state->parent;
  356         state->parent=iup(state->parent);
  357     }
  358 }
  359 
  360 
  361 void Sole_Parser_Class::pcdata(XML_Parser_Class &c,
  362         XML_Parser &p,
  363         void *data,
  364         const char *chars)
  365 {
  366   (void)c; 
  367   
  368  Parse_State *state = (Parse_State *)data;
  369  
  370  if (state->parent != NULL && p.context(0) == "w")
  371    state->parent->set(EST_String("word"), chars);
  372   
  373   //   printf("SOLE XML Parser [pcdata[%s]] %d\n", chars, state->depth);
  374 }
  375 
  376 
  377 void Sole_Parser_Class::cdata(XML_Parser_Class &c,
  378        XML_Parser &p,
  379        void *data,
  380        const char *chars)
  381 {
  382   (void)c; (void)p; (void)data; (void)chars;
  383   // Parse_State *state = (Parse_State *)data;
  384 
  385   //   printf("SOLE XML Parser [cdata[%s]] %d\n", chars, state->depth);
  386 }
  387 
  388 
  389 void Sole_Parser_Class::processing(XML_Parser_Class &c,
  390         XML_Parser &p,
  391         void *data,
  392         const char *instruction)
  393 {
  394   (void)c; (void)p; 
  395   Parse_State *state = (Parse_State *)data;
  396 
  397   printf("SOLE XML Parser [proc[%s]] %d\n", instruction, state->depth);
  398 }
  399 
  400 
  401 void Sole_Parser_Class::error(XML_Parser_Class &c,
  402        XML_Parser &p,
  403        void *data)
  404 {
  405   (void)c; (void)p;  (void)data;
  406   // Parse_State *state = (Parse_State *)data;
  407 
  408   EST_error("SOLE XML Parser %s", get_error(p));
  409 
  410   est_error_throw();
  411 }