"Fossies" - the Fresh Open Source Software Archive

Member "speech_tools/ling_class/relation_io.cc" (11 Sep 2017, 16478 Bytes) of package /linux/misc/speech_tools-2.5.0-release.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "relation_io.cc" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 2.4-release_vs_2.5.0-release.

    1 /*************************************************************************/
    2 /*                                                                       */
    3 /*                Centre for Speech Technology Research                  */
    4 /*                     University of Edinburgh, UK                       */
    5 /*                      Copyright (c) 1995,1996                          */
    6 /*                        All Rights Reserved.                           */
    7 /*                                                                       */
    8 /*  Permission is hereby granted, free of charge, to use and distribute  */
    9 /*  this software and its documentation without restriction, including   */
   10 /*  without limitation the rights to use, copy, modify, merge, publish,  */
   11 /*  distribute, sublicense, and/or sell copies of this work, and to      */
   12 /*  permit persons to whom this work is furnished to do so, subject to   */
   13 /*  the following conditions:                                            */
   14 /*   1. The code must retain the above copyright notice, this list of    */
   15 /*      conditions and the following disclaimer.                         */
   16 /*   2. Any modifications must be clearly marked as such.                */
   17 /*   3. Original authors' names are not deleted.                         */
   18 /*   4. The authors' names are not used to endorse or promote products   */
   19 /*      derived from this software without specific prior written        */
   20 /*      permission.                                                      */
   21 /*                                                                       */
   22 /*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        */
   23 /*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
   24 /*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
   25 /*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     */
   26 /*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
   27 /*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
   28 /*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
   29 /*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
   30 /*  THIS SOFTWARE.                                                       */
   31 /*                                                                       */
   32 /*************************************************************************/
   33 /*                      Author :  Paul Taylor updated by awb             */
   34 /*                      Date   :  Feb 1999                               */
   35 /*-----------------------------------------------------------------------*/
   36 /*                      Relation class file i/o, label files             */
   37 /*                                                                       */
   38 /*=======================================================================*/
   39 #include <cstdlib>
   40 #include <cstdio>
   41 #include <fstream>
   42 #include "EST_unix.h"
   43 #include "EST_types.h"
   44 #include "ling_class/EST_Relation.h"
   45 #include "EST_string_aux.h"
   46 #include "EST_cutils.h"
   47 #include "EST_TList.h"
   48 #include "EST_Option.h"
   49 #include "relation_io.h"
   50 
   51 #define DEF_SAMPLE_RATE 16000
   52 #define HTK_UNITS_PER_SECOND 10000000
   53 
   54 static EST_Regex RXleadingwhitespace("^[ \t\n\r][ \t\n\r]*.*$");
   55 
   56 EST_read_status read_label_portion(EST_TokenStream &ts, EST_Relation &s, 
   57                    int sample);
   58 
   59 EST_read_status load_esps_label(EST_TokenStream &ts,EST_Relation &rel)
   60 {
   61     ts.set_SingleCharSymbols(";");
   62     ts.set_quotes('"','\\');
   63     EST_String key, val;
   64 
   65     // Skip the header
   66     while (!ts.eof())
   67     {
   68     key = ts.get().string();
   69         if (key == "#")
   70             break;
   71 
   72     val = ts.get_upto_eoln().string();
   73     // delete leading whitespace
   74     if (val.matches(RXleadingwhitespace))
   75         val = val.after(RXwhite);
   76     rel.f.set(key, val);
   77     }
   78         
   79     if (ts.peek() == "") return format_ok;
   80 
   81     while (!ts.eof())
   82     {
   83     EST_Item *si = rel.append();
   84     EST_String name;
   85     
   86     si->set("end",(float)atof(ts.get().string()));
   87     ts.get();  // skip the color;
   88 
   89     for (name = ""; (!ts.eoln()) && (ts.peek() != ";"); )
   90     {
   91         EST_Token &t = ts.get();
   92         if (name.length() > 0)  // preserve internal whitespace
   93         name += t.whitespace();  
   94         name += t.string();
   95     }
   96     si->set_name(name);
   97     
   98     if (ts.peek().string() == ";") // absorb separator
   99     {
  100         ts.get();
  101         si->features().load(ts);
  102     }
  103     }
  104     return format_ok;
  105 }
  106 
  107 EST_write_status save_esps_label(const EST_String &filename, 
  108                  const EST_Relation &s,
  109                  bool evaluate_ff)
  110 {
  111     ostream *outf;
  112     if (filename == "-")
  113     outf = &cout;
  114     else
  115     outf = new ofstream(filename);
  116     
  117     if (!(*outf))  
  118     {
  119     cerr << "save_esps_label: can't open label output file \"" << 
  120         filename << "\"" << endl;
  121     return write_fail;
  122     }
  123 
  124     EST_write_status st=save_esps_label(outf, s, evaluate_ff);
  125   
  126     if (outf != &cout)
  127     delete outf;
  128 
  129     return st;
  130 }
  131 
  132 EST_write_status save_esps_label(ostream *outf,
  133                  const EST_Relation &s,
  134                  bool evaluate_ff)
  135 {
  136     EST_Item *ptr;
  137     
  138     *outf << "separator ;\n";
  139     if (!s.f.present("nfields"))
  140     *outf << "nfields 1\n";
  141 
  142     EST_Features::Entries p;
  143     for (p.begin(s.f); p; ++p)
  144     *outf << p->k << " " << p->v << endl;
  145 
  146     *outf << "#\n";
  147 /*    if (f("timing_style") == "event")
  148         *outf << "timing_style event\n";
  149     else if (f("timing_style") == "unit")
  150         *outf << "timing_style unit\n";
  151 */
  152     
  153     for (ptr = s.head(); ptr != 0; ptr = inext(ptr))
  154     {
  155     *outf << "\t";
  156     outf->precision(5);
  157     outf->setf(ios::scientific, ios::floatfield);
  158     outf->width(8);
  159     //  outf->fill('0');
  160     if (s.f("timing_style","0") == "event")
  161         *outf << ptr->F("time",0);
  162     else
  163         *outf << ptr->F("end",0);
  164     
  165     *outf << " 26 \t" << ptr->S("name","0");
  166 
  167     EST_Features f2;
  168     f2 = ptr->features();
  169     f2.remove("name");
  170     f2.remove("end");
  171     if (evaluate_ff)
  172         evaluate(ptr,f2);
  173 
  174     if (f2.length() > 0)
  175     {
  176         *outf << " ; ";
  177         f2.save(*outf);
  178     }
  179     *outf << endl;
  180     }
  181     
  182     return write_ok;
  183 }
  184 
  185 EST_read_status load_ogi_label(EST_TokenStream &ts, EST_Relation &s)
  186 {
  187     // This function reads OGI style label files. The start, end
  188     // time and names of the labels are mandatory. 
  189     EST_String key, val;
  190     float sr;
  191     int isr;
  192     
  193     // set up the character constant values for this stream
  194     ts.set_SingleCharSymbols(";");
  195 
  196     // Skip over header
  197 
  198     while(!ts.eof())
  199       {
  200     if ((ts.peek().col() == 0) && (ts.peek() == "END"))
  201       {
  202         if (ts.peek() == "END")
  203           { // read rest of header
  204         ts.get();
  205         ts.get();
  206         ts.get();
  207           }
  208         break;
  209       }
  210     key = ts.get().string();
  211     val = ts.get().string();
  212       }
  213 
  214     sr = 1000.0 / atof(val);
  215     isr = (int)sr;
  216     
  217     if (ts.eof())
  218     {
  219     cerr << "Error: couldn't find header in label file " 
  220          << ts.filename() << endl;
  221     return wrong_format;
  222     }
  223 
  224     if (read_label_portion(ts, s, isr) == misc_read_error)
  225     {
  226     cerr << "error: in label file " << ts.filename() << " at line " <<
  227         ts.linenum() << endl;
  228     return misc_read_error;
  229     }
  230     return format_ok;
  231 }
  232 
  233 EST_read_status load_words_label(EST_TokenStream &ts, EST_Relation &s)
  234 {
  235     // This function reads label files in the form of simple word strings 
  236     // with no timing information.
  237     EST_Item *item;
  238 
  239     while (!ts.eof())
  240     {
  241     item = s.append();
  242     item->set("name",(EST_String)ts.get());
  243     item->set("end",0.0);
  244     }
  245 
  246     return format_ok;
  247 }
  248 
  249 static float convert_long_num_string_to_time(const char *s,int sample)
  250 {
  251     // For those label files that think 100 nanosecond times are cool
  252     // we have to provide a special function to convert them as 
  253     // this quickly gets beyond the capabilities of ints.
  254 
  255     if (strlen(s) < 15)
  256     return atof(s)/sample;
  257     else
  258     {
  259     double a = 0,d;
  260     int i=0;
  261     for (i=0; 
  262          (strchr(" \n\r\t",s[i]) != NULL) && (s[i] != '\0');
  263          i++);
  264 
  265     for ( ;
  266           (s[i] != '\0') && (s[i] >= '0') && (s[i] <= '9');
  267           i++)
  268     {
  269         a = a*10;
  270         d = s[i]-'0';
  271         a += (d/(double)sample);
  272     }
  273     return a;
  274     }
  275 }
  276 
  277 EST_read_status read_label_portion(EST_TokenStream &ts, EST_Relation &s, 
  278                    int sample)
  279 {
  280     EST_Item *item;
  281     float hend;
  282     EST_String str;
  283     
  284     while(!ts.eof())
  285     {
  286     str = ts.get().string();
  287     if (str == ".")
  288         return format_ok;
  289     
  290     item = s.append();
  291     
  292     str = ts.get().string();
  293     hend = convert_long_num_string_to_time(str,sample);
  294     
  295     item->set("end",hend);                   // time 
  296     item->set("name",ts.get().string());     // name
  297     
  298     if (!ts.eoln())
  299         item->set("rest_lab",ts.get_upto_eoln().string());
  300     }
  301     
  302     return format_ok;
  303 }    
  304 
  305 EST_read_status load_sample_label(EST_TokenStream &ts,
  306                   EST_Relation &s, int sample)
  307 {
  308     
  309     if (sample == 0)    // maybe this should be an error
  310     sample = DEF_SAMPLE_RATE;
  311     
  312     // set up the character constant values for this stream
  313     ts.set_SingleCharSymbols(";");
  314     
  315     s.clear();
  316     if (read_label_portion(ts, s, sample) == misc_read_error)
  317     {
  318     cerr << "error: in label file " << ts.filename() << " at line " <<
  319         ts.linenum() << endl;
  320     return misc_read_error;
  321     }
  322     return format_ok;
  323 }
  324 
  325 EST_write_status save_htk_label(const EST_String &filename, 
  326                 const EST_Relation &a)
  327 {
  328     ostream *outf;
  329     if (filename == "-")
  330     outf = &cout;
  331     else
  332     outf = new ofstream(filename);
  333     
  334     if (!(*outf))
  335     {
  336     cerr << "save_htk_label: can't open label output file \"" << 
  337         filename << "\"" << endl;
  338     return write_fail;
  339     }
  340 
  341     EST_write_status s = save_htk_label(outf, a);
  342 
  343     
  344     if (outf != &cout)
  345     delete outf;
  346     
  347     return s;
  348 }
  349 
  350 EST_write_status save_htk_label(ostream *outf,
  351                 const EST_Relation &a)
  352 {
  353     EST_Item *ptr;
  354     float end,start;
  355     
  356     outf->precision(6);
  357 
  358     start = end = 0;
  359     for (ptr = a.head(); ptr != 0; ptr = inext(ptr))
  360     {
  361     outf->width(15);
  362     cout.setf(ios::left,ios::adjustfield);
  363     *outf << (int)(start * HTK_UNITS_PER_SECOND);
  364     outf->width(15);
  365     end = ptr->F("end",0.0);
  366     *outf << (int)(end * HTK_UNITS_PER_SECOND);
  367     *outf << " " << ptr->name() << endl;
  368     start = end;
  369     }
  370 
  371     return write_ok;
  372 }
  373 
  374 #if 0
  375 EST_write_status save_label_spn(const EST_String &filename, 
  376                 const EST_Relation &a)
  377 {
  378     EST_Stream_Item *ptr;
  379     
  380     ostream *outf;
  381     if (filename == "-")
  382     outf = &cout;
  383     else
  384     outf = new ofstream(filename);
  385     
  386     if (!(*outf))
  387     {
  388     cerr << "save_label_spn: can't open label output file \"" 
  389         << filename << "\"" << endl;
  390     return write_fail;
  391     }
  392     
  393     ptr = a.head();
  394     outf->precision(3);
  395     outf->setf(ios::left, ios::adjustfield);
  396     outf->width(8);
  397     *outf << ptr->name();
  398     outf->setf(ios::scientific, ios::floatfield);
  399     outf->width(8);
  400     *outf << (ptr->dur() * 1000.0) << "\t (0,140)" << endl;
  401     
  402     for (; inext(ptr) != 0; ptr = inext(ptr))
  403     {
  404     outf->precision(3);
  405     outf->setf(ios::left, ios::adjustfield);
  406     outf->width(8);
  407     *outf << ptr->name();
  408     outf->setf(ios::scientific, ios::floatfield);
  409     outf->width(8);
  410     *outf << (ptr->dur() * 1000.0) << endl;
  411     }
  412     //    outf->precision(3);
  413     //    outf->setf(ios::left, ios::adjustfield);
  414     outf->width(8);
  415     *outf << ptr->name();
  416     outf->setf(ios::scientific, ios::floatfield);
  417     outf->width(8);
  418     *outf << (ptr->dur() * 1000.0) << "\t (99,80)" << endl;
  419     
  420     if (outf != &cout)
  421     delete outf;
  422     
  423     return write_ok;
  424 }
  425 
  426 EST_write_status save_label_names(const EST_String &filename, 
  427                   const EST_Relation &a, 
  428                   const EST_String &features)
  429 {
  430     EST_Stream_Item *ptr;
  431     
  432     ostream *outf;
  433     if (filename == "-")
  434     outf = &cout;
  435     else
  436     outf = new ofstream(filename);
  437     
  438     if (!(*outf))  
  439     {
  440     cerr << "save_label_name: can't open label output file \"" 
  441         << filename << "\"" << endl;
  442     return misc_write_error;
  443     }
  444     
  445     for (ptr = a.head(); inext(ptr) != 0; ptr = inext(ptr))
  446     {
  447     *outf << ptr->name();
  448     if ((features != "") && (features != "OneLine"))
  449         *outf << endl;
  450     else
  451         *outf << " ";
  452     }
  453     
  454     *outf << ptr->name() << endl;
  455     
  456     if (outf != &cout)
  457     delete outf;
  458     return write_ok;
  459 }
  460 #endif
  461 
  462 EST_write_status save_RelationList(const EST_String &filename, 
  463                    const EST_RelationList &plist, 
  464                    int time, int path)
  465 {
  466     EST_Litem *p;
  467     EST_Item *ptr;
  468     EST_String outname;
  469     float start,end;
  470     
  471     ostream *outf;
  472     if (filename == "-")
  473     outf = &cout;
  474     else
  475     outf = new ofstream(filename);
  476     
  477     if (!(*outf))
  478     {
  479     cerr << "save_StreamList: can't open MLF output file \"" 
  480         << filename << "\"\n";
  481     return write_fail;
  482     }
  483     
  484     *outf << "#!MLF!#\n";   // MLF header/identifier
  485     outf->precision(6);
  486 
  487     start = end = 0;
  488     for (p = plist.head(); p != 0; p = p->next())
  489     {
  490     outname = path ? plist(p).name() : basename(plist(p).name());
  491     *outf << "\"*/" << outname<<"\"\n";
  492     for (ptr = plist(p).head(); ptr != 0; ptr = inext(ptr))
  493     {
  494         if (time)
  495         {
  496         outf->width(15);
  497         cout.setf(ios::left,ios::adjustfield);
  498         *outf << (int)(start * HTK_UNITS_PER_SECOND);
  499         outf->width(15);
  500         end = ptr->F("end",0.0);
  501         *outf << (int)(end * HTK_UNITS_PER_SECOND) << " ";
  502         start = end;
  503         }
  504         *outf << ptr->S("name","0") << endl;
  505     }
  506     *outf << ".\n";
  507     }
  508     
  509     if (outf != &cout)
  510     delete outf;
  511     return write_ok;
  512 }    
  513 
  514 EST_write_status save_WordList(const EST_String &filename, 
  515                    const EST_RelationList &plist, 
  516                    int style)
  517 {
  518     EST_Litem *p;
  519     EST_Item *ptr;
  520     
  521     ostream *outf;
  522     if (filename == "-")
  523     outf = &cout;
  524     else
  525     outf = new ofstream(filename);
  526     
  527     if (!(*outf))
  528     {
  529     cerr << "save:WordList: can't open WordList output file \"" 
  530         << filename << "\"\n";
  531     return write_fail;
  532     }
  533     
  534     for (p = plist.head(); p != 0; p = p->next())
  535     {
  536     for (ptr = plist(p).head(); inext(ptr) != 0; ptr = inext(ptr))
  537     {
  538         *outf << ptr->name();
  539         if (style == 0)
  540         *outf << endl;
  541         else
  542         *outf << " ";
  543     }
  544     if (ptr != 0)
  545         *outf << ptr->name() << endl;
  546     }
  547     
  548     if (outf != &cout)
  549     delete outf;
  550     return write_ok;
  551 }    
  552 
  553 EST_write_status save_ind_RelationList(const EST_String &filename, 
  554                        const EST_RelationList &plist, 
  555                        const EST_String &features, 
  556                        int path)
  557 {
  558     EST_Litem *p;
  559     EST_String outname;
  560     (void) filename;
  561     (void) features;
  562     
  563     for (p = plist.head(); p != 0; p = p->next())
  564     {
  565     outname = path ? plist(p).name() : basename(plist(p).name());
  566     if (plist(p).save(outname,false) != write_ok)
  567         return misc_write_error;
  568     }
  569     
  570     return write_ok;
  571 }    
  572 
  573 EST_read_status load_RelationList(const EST_String &filename, 
  574                   EST_RelationList &plist)
  575 {
  576     EST_TokenStream ts;
  577     EST_String fns, name;
  578     
  579     if (((filename == "-") ? ts.open(cin) : ts.open(filename)) != 0)
  580     {
  581     cerr << "Can't open label input file " << filename << endl;
  582     return misc_read_error;
  583     }
  584     // set up the character constant values for this stream
  585     ts.set_SingleCharSymbols(";");
  586     
  587     // Skip over header
  588     if (ts.get().string() != "#!MLF!#")
  589     {
  590     cerr << "Not MLF file\n";
  591     return wrong_format;
  592     }
  593 
  594     while(!ts.eof())
  595     {
  596     // put filename in as stream name. The filename is usually surrounded
  597     // by quotes, so remove these.
  598     fns = ts.get().string();
  599     strip_quotes(fns);
  600     EST_Relation s(fns);
  601     s.f.set("name", fns); // simonk
  602     plist.append(s);
  603 
  604         if (read_label_portion(ts, plist.last(), 10000000) == misc_read_error)
  605     {
  606         cerr << "error: in reading MLF file\n";
  607         cerr << "section for file " << fns << 
  608         " at line " << ts.linenum() << " is badly formatted\n";
  609         
  610         return misc_read_error;
  611     }
  612     }
  613 
  614     return format_ok;
  615 }
  616 
  617 static void pad_ends(EST_Relation &s, float length)
  618 {
  619     // add evenly spaced dummy end values to Relation
  620     EST_Item *p;
  621     int i;
  622     
  623     for (i = 0, p = s.head(); p; p = inext(p), ++i)
  624     p->set("end",(length * float(i)/float(s.length())));
  625 }
  626 
  627 EST_read_status read_RelationList(EST_RelationList &plist, 
  628                   EST_StrList &files, EST_Option &al)
  629 {
  630     EST_Litem *p, *plp;
  631     
  632     if (al.val("-itype", 0) == "mlf")
  633     {
  634     if (load_RelationList(files.first(), plist) != format_ok)
  635         exit (-1);
  636     }
  637     else
  638     for (p = files.head(); p; p = p->next())
  639     {
  640         EST_Relation s(files(p));
  641         plist.append(s);
  642         plp = plist.tail();
  643         if (al.present("-itype"))
  644         {
  645         if (plist(plp).load(files(p), al.val("-itype")) != format_ok)
  646             exit (-1);
  647         }
  648         else if (plist(plp).load(files(p)) != format_ok)
  649         exit (-1);
  650         if ((al.val("-itype", 0) == "words") && (al.present("-length")))
  651         pad_ends(s, al.fval("-length"));
  652         
  653     }
  654     
  655     return format_ok;
  656 }