"Fossies" - the Fresh Open Source Software Archive

Member "speech_tools/base_class/EST_FeatureData.cc" (11 Sep 2017, 13777 Bytes) of package /linux/misc/speech_tools-2.5.0-release.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "EST_FeatureData.cc" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 2.4-release_vs_2.5.0-release.

    1 /************************************************************************/
    2 /*                                                                      */
    3 /*                Centre for Speech Technology Research                 */
    4 /*                     University of Edinburgh, UK                      */
    5 /*                       Copyright (c) 1996,1997                        */
    6 /*                        All Rights Reserved.                          */
    7 /*                                                                      */
    8 /*  Permission is hereby granted, free of charge, to use and distribute */
    9 /*  this software and its documentation without restriction, including  */
   10 /*  without limitation the rights to use, copy, modify, merge, publish, */
   11 /*  distribute, sublicense, and/or sell copies of this work, and to     */
   12 /*  permit persons to whom this work is furnished to do so, subject to  */
   13 /*  the following conditions:                                           */
   14 /*   1. The code must retain the above copyright notice, this list of   */
   15 /*      conditions and the following disclaimer.                        */
   16 /*   2. Any modifications must be clearly marked as such.               */
   17 /*   3. Original authors' names are not deleted.                        */
   18 /*   4. The authors' names are not used to endorse or promote products  */
   19 /*      derived from this software without specific prior written       */
   20 /*      permission.                                                     */
   21 /*                                                                      */
   22 /*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK       */
   23 /*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING     */
   24 /*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT  */
   25 /*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE    */
   26 /*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES   */
   27 /*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN  */
   28 /*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,         */
   29 /*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF      */
   30 /*  THIS SOFTWARE.                                                      */
   31 /*                                                                      */
   32 /************************************************************************/
   33 /*                                                                      */
   34 /*                   Author: Paul Taylor Caley                          */
   35 /*                       Date: July 1998                                */
   36 /* -------------------------------------------------------------------- */
   37 /*                     Feature Data Class                               */
   38 /*                                                                      */
   39 /************************************************************************/
   40 
   41 #include "EST_TMatrix.h"
   42 #include "EST_Val.h"
   43 #include "EST_FeatureData.h"
   44 #include "EST_string_aux.h"
   45 #include "EST_Token.h"
   46 #include "EST_FileType.h"
   47 #include "EST_error.h"
   48 #include <iostream>
   49 #include <fstream>
   50 
   51 #include "EST_THash.h"
   52 
   53 
   54 EST_FeatureData::EST_FeatureData()
   55 {
   56     default_vals();
   57 }
   58 
   59 
   60 
   61 EST_FeatureData::EST_FeatureData(const EST_FeatureData &a)
   62 { 
   63     default_vals();
   64     copy(a);
   65 }
   66 
   67 EST_FeatureData::~EST_FeatureData(void)
   68 {
   69 }
   70 
   71 int EST_FeatureData::num_samples() const
   72 {
   73     return fd.num_rows();
   74 }
   75 
   76 int EST_FeatureData::num_features() const
   77 {
   78     return fd.num_columns();
   79 }
   80 
   81 
   82 void EST_FeatureData::default_vals()
   83 {
   84 /*    cout << "Default values\n";
   85     p_sub_fd = false;
   86     p_info = new EST_FeatureInfo;
   87 */
   88 }
   89 
   90 void EST_FeatureData::set_num_samples(int num_samples, bool preserve)
   91 {
   92     fd.resize(num_samples, fd.num_columns(), preserve);
   93 }
   94 
   95 void EST_FeatureData::resize(int num_samples, int num_features, bool preserve)
   96 {
   97     // If enlargement is required, give new features dummy names
   98     // and set their types to <STRING>. If preserve is set to 0
   99     // rename all features this way.
  100 
  101     if (num_features > fd.num_columns())
  102     {
  103     int i;
  104     if (preserve)
  105         i = fd.num_columns();
  106     else
  107         i = 0;
  108     for (; i < num_features; ++i)
  109         info.set("unnamed_" + itoString(i), "<STRING>");
  110     }
  111 
  112     fd.resize(num_samples, num_features, preserve);
  113 }
  114 
  115 void EST_FeatureData::resize(int num_samples, EST_Features &f, bool preserve)
  116 {
  117     fd.resize(num_samples, f.length(), preserve);
  118     info = f;
  119 }
  120 
  121 EST_String EST_FeatureData::type(const EST_String &feature_name)
  122 {
  123     EST_String t = info.S(feature_name);
  124     
  125     if (t.contains("<", 0)) // i.e. a predefined type
  126     return t;
  127 
  128     return "undef";
  129 }
  130 
  131 EST_StrList EST_FeatureData::values(const EST_String &feature_name)
  132 {
  133     EST_StrList v;
  134     EST_String t = info.S(feature_name);
  135     
  136     // check for infinite set:
  137     if ((t == "<FLOAT>") || (t == "<INT>") || (t == "<STRING>"))
  138     return v; 
  139 
  140     StringtoStrList(t, v);
  141     return v;
  142 }
  143 
  144 int EST_FeatureData::feature_position(const EST_String &feature_name)
  145 {
  146     int i;
  147 
  148     EST_Features::Entries p;
  149 
  150     for (i = 0, p.begin(info); p; ++p, ++i)
  151     {
  152 //  cout << "looking at " << info.fname(p) << endl;
  153 //  cout << "i = " << i << endl;
  154     if (p->k == feature_name)
  155         return i;
  156     }
  157 
  158     EST_error("No such feature %s\n", (const char *) feature_name);
  159     return 0;
  160 }
  161 
  162 int EST_FeatureData::update_values(const EST_String &feature_name, int max)
  163 {
  164     // This should be converted back to Hash tables once extra
  165     // iteration functions are added the EST_Hash.
  166     int i, col;
  167     EST_Features values;
  168     EST_String v;
  169 
  170 //    EST_TStringHash<int> values(max);
  171 
  172     col = feature_position(feature_name);
  173 
  174     for (i = 0; i < num_samples(); ++i)
  175     values.set(fd.a(i, col).string(), 1);
  176 
  177     // check to see if there are more types than allowed, if so
  178     // just set to open set STRING
  179     if (values.length() > max)
  180     v = "<STRING>"; 
  181     else
  182       {
  183     EST_Features::Entries p;
  184     for(p.begin(values); p; ++p)
  185         v += p->k + " ";
  186       }
  187 
  188     info.set(feature_name, v);
  189     
  190     return values.length();
  191 }
  192 
  193 EST_FeatureData & EST_FeatureData::copy(const EST_FeatureData &a)
  194 {
  195     (void) a;
  196 /*    // copy on a sub can't alter header information
  197     if (!p_sub_fd)
  198     {
  199     delete p_info;
  200     *p_info = *(a.p_info);
  201     }
  202     // but data can be copied so long as no resizing is involved.
  203     EST_ValMatrix::operator=(a);
  204 */
  205     return *this;
  206 }
  207 
  208 /*void EST_FeatureData::a(int i, int j)
  209 {
  210     return EST_ValMatrix::a(i, j);
  211 }
  212 */
  213 /*
  214 EST_Val &EST_FeatureData::operator()(int i, int j)
  215 {
  216     return a(i, j);
  217 }
  218 
  219 EST_Val &EST_FeatureData::operator()(int s, const EST_String &f)
  220 {
  221     int i = info().field_index(f);
  222     return a(s, i);
  223 }
  224 
  225 EST_FeatureData &EST_FeatureData::operator=(const EST_FeatureData &f)
  226 {
  227     return copy(f);
  228 }
  229 
  230 */
  231 EST_Val &EST_FeatureData::a(int i, const EST_String &f)
  232 {
  233   (void)f;
  234   return fd.a(i, 0);
  235 }
  236 
  237 EST_Val &EST_FeatureData::a(int i, int j)
  238 {
  239     return fd.a(i, j);
  240 }
  241 const EST_Val &EST_FeatureData::a(int i, const EST_String &f) const
  242 {
  243   (void)f;
  244     return fd.a(i, 0);
  245 }
  246 
  247 const EST_Val &EST_FeatureData::a(int i, int j) const
  248 {
  249     return fd.a(i, j);
  250 }
  251 
  252 
  253 /*
  254 void EST_FeatureData::sub_samples(EST_FeatureData &f, int start, int num)
  255 {
  256     sub_matrix(f, start, num);
  257     f.p_info = p_info;
  258     f.p_sub_fd = true;
  259 }
  260 
  261 void EST_FeatureData::extract_named_fields(const EST_String &fields)
  262 {
  263     EST_FeatureData n;
  264     // there must be a more efficient way than a copy?
  265     extract_named_fields(n, fields);
  266     *this = n;
  267 }
  268 
  269 void EST_FeatureData::extract_named_fields(const EST_StrList &fields)
  270 {
  271     EST_FeatureData n;
  272     // there must be a more efficient way than a copy?
  273     extract_named_fields(n, fields);
  274     *this = n;
  275 }
  276 
  277 void EST_FeatureData::extract_numbered_fields(const EST_String &fields)
  278 {
  279     EST_FeatureData n;
  280     // there must be a more efficient way than a copy?
  281     extract_numbered_fields(n, fields);
  282     *this = n;
  283 }
  284 
  285 void EST_FeatureData::extract_numbered_fields(const EST_IList &fields)
  286 {
  287     EST_FeatureData n;
  288     // there must be a more efficient way than a copy?
  289     extract_numbered_fields(n, fields);
  290     *this = n;
  291 }
  292 
  293 
  294 void EST_FeatureData::extract_named_fields(EST_FeatureData &f, 
  295                        const EST_String &fields) const
  296 {
  297     EST_StrList s;
  298 
  299     StringtoStrList(fields, s);
  300     extract_named_fields(f, s);
  301 }
  302 void EST_FeatureData::extract_named_fields(EST_FeatureData &f, 
  303                        const EST_StrList &n_fields) const
  304 {
  305     EST_Litem *p;
  306     EST_StrList n_types;
  307     int i, j;
  308 
  309     info().extract_named_fields(*(f.p_info), n_fields);
  310 
  311     for (p = n_fields.head(), i = 0; i < f.num_fields(); ++i, p = p->next())
  312     for (j = 0; j < f.num_samples(); ++j)
  313         f(j, i) = a(j, n_fields(p));
  314 
  315 }
  316 
  317 void EST_FeatureData::extract_numbered_fields(EST_FeatureData &f, 
  318                           const EST_IList &fields) const
  319 {
  320     EST_Litem *p;
  321     EST_StrList n_fields;
  322     int i, j;
  323 
  324     for (p = fields.head(); p; p = p->next())
  325     n_fields.append(info().field_name(fields(p)));
  326     
  327     info().extract_named_fields(*(f.p_info), n_fields);
  328 
  329     for (p = fields.head(), i = 0; i < f.num_fields(); ++i, p = p->next())
  330     for (j = 0; j < f.num_samples(); ++j)
  331         f(j, i) = a(j, fields(p));
  332 
  333 }
  334 
  335 void EST_FeatureData::extract_numbered_fields(EST_FeatureData &f, 
  336                           const EST_String &fields) const
  337 {
  338     EST_StrList s;
  339     EST_IList il;
  340 
  341     StringtoStrList(fields, s);
  342     StrListtoIList(s, il);
  343     extract_numbered_fields(f, il);
  344 }
  345 */
  346 
  347 EST_write_status save_est(const EST_FeatureData &f, const EST_String &filename)
  348 {
  349   (void)f;
  350   (void)filename;
  351 /*    
  352     ostream *outf;
  353     EST_Litem *s, *e;
  354     int i;
  355     if (filename == "-")
  356     outf = &cout;
  357     else
  358     outf = new ofstream(filename);
  359     
  360     if (!(*outf))
  361     return write_fail;
  362     
  363     outf->precision(5);
  364     outf->setf(ios::scientific, ios::floatfield);
  365     outf->width(8);
  366     
  367     *outf << "EST_File feature_data\n"; // EST header identifier
  368     *outf << "DataType ascii\n";
  369     *outf << "NumSamples " << f.num_samples() << endl;
  370     *outf << "NumFields " << f.num_fields() << endl;
  371     *outf << "FieldNames " << f.info().field_names();
  372     *outf << "FieldTypes " << f.info().field_types();
  373     if (f.info().group_start.length() > 0)
  374     for (s = f.info().group_start.head(), e = f.info().group_end.head(); 
  375          s; s = s->next(), e = e->next())
  376         *outf << "Group " << f.info().group_start.key(s) << " " << 
  377         f.info().group_start.val(s) << " " << f.info().group_end.val(e) << endl;
  378 
  379     for (i = 0; i < f.num_fields(); ++i)
  380     if (f.info().field_values(i).length() > 0)
  381         *outf << "Field_" << i << "_Values " 
  382         << f.info().field_values(i) << endl;
  383 
  384     *outf << "EST_Header_End\n"; // EST end of header identifier
  385 
  386 //    *outf << ((EST_ValMatrix ) f);
  387     *outf << f;
  388     */
  389 
  390     return write_ok;
  391 }
  392 
  393 
  394 EST_write_status EST_FeatureData::save(const EST_String &filename, 
  395                        const EST_String &file_type) const
  396 {
  397     if ((file_type == "est") || (file_type == ""))
  398     return save_est(*this, filename);
  399 /*    else if (file_type = "octave")
  400     return save_octave(*this, filename);
  401     else if (file_type = "ascii")
  402     return save_ascii(*this, filename);
  403 */
  404 
  405     cerr << "Can't save feature data in format \"" << file_type << endl;
  406     return write_fail;
  407 }
  408 
  409 
  410 
  411 EST_read_status EST_FeatureData::load(const EST_String &filename)
  412 {
  413     int i, j;
  414     EST_Option hinfo;
  415     EST_String k, v;
  416     EST_read_status r;
  417     bool ascii;
  418     EST_TokenStream ts;
  419     EST_EstFileType t;
  420     int ns, nf;
  421 
  422     if (((filename == "-") ? ts.open(cin) : ts.open(filename)) != 0)
  423     {
  424     cerr << "Can't open track file " << filename << endl;
  425     return misc_read_error;
  426     }
  427     // set up the character constant values for this stream
  428     ts.set_SingleCharSymbols(";");
  429     ts.set_quotes('"','\\');
  430 
  431     if ((r = read_est_header(ts, hinfo, ascii, t)) != format_ok)
  432     {
  433     cerr << "Error reading est header of file " << filename << endl;
  434     return r;
  435     }
  436 
  437     if (t != est_file_feature_data)
  438     {
  439     cerr << "Not a EST Feature Data file: " << filename << endl;
  440     return misc_read_error;
  441     }
  442 
  443     ns = hinfo.ival("NumSamples");
  444     nf = hinfo.ival("NumFeatures");
  445     
  446     cout << "ns: " << ns << endl;
  447     cout << "nf: " << nf << endl;
  448     resize(ns, nf);
  449 
  450     info.clear(); // because resize will make default names
  451 
  452     for (i = 0; i < nf; ++i)
  453     {
  454     k = "Feature_" + itoString(i+1);
  455     if (hinfo.present(k))
  456     {
  457         v = hinfo.val(k);
  458         info.set(v.before(" "), v.after(" "));
  459         cout << "value: " << v.after(" ") << endl;
  460     }
  461     else
  462         EST_error("No feature definition given for feature %d\n", i);
  463     }
  464 
  465     for (i = 0; i < ns; ++i)
  466       {
  467     EST_Features::Entries p;
  468     for (p.begin(info), j = 0; j < nf; ++j, ++p)
  469     {
  470         if (p->k == "<FLOAT>")
  471           a(i, j) = atof(ts.get().string());
  472         else if (p->k == "<BOOL>")
  473         a(i, j) = atoi(ts.get().string());
  474         else if (p->k == "<INT>")
  475         a(i, j) = atoi(ts.get().string());
  476         else
  477         a(i, j) = ts.get().string();
  478     }
  479       }
  480 
  481     return format_ok;
  482 }
  483 
  484 /*ostream& operator << (ostream &st, const EST_FeatureInfo &a)
  485 {   
  486 
  487 //    st << a.field_names() << endl;
  488 //    st << a.field_types() << endl;
  489 
  490     return st;
  491 }
  492 */
  493 
  494 ostream& operator << (ostream &st, const EST_FeatureData &d)
  495 {   
  496     int i, j;
  497     EST_String t;
  498     EST_Val v;
  499 
  500 //    st << a;
  501 
  502 //    EST_ValMatrix::operator<<(st, (EST_ValMatrix)a);
  503 
  504     for (i = 0; i < d.num_samples(); ++i)
  505     {
  506     for (j = 0; j < d.num_features(); ++j)
  507     {
  508         v =  d.a(i, j);
  509         st << v  << " ";
  510 //      cout << "field type " << a.info().field_type(j) << endl;
  511 /*      else if (a.info().field_type(j) == "float")
  512         st << a.a(i, j);
  513         else if (a.info().field_type(j) == "int")
  514         st << a.a(i, j);
  515 
  516         else if (a.info().field_type(j) == "string")
  517         {
  518         //      st << "\"" << a.a(i, j) << "\"";
  519         t = a.a(i, j);
  520         t.gsub(" ", "_");
  521         st << t;
  522         }
  523 */
  524     }
  525     st << endl;
  526     }
  527 
  528     return st;
  529 }