"Fossies" - the Fresh Open Source Software Archive

Member "speech_tools/include/EST_Wagon.h" (26 Oct 2017, 12067 Bytes) of package /linux/misc/speech_tools-2.5.0-release.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "EST_Wagon.h" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 2.4-release_vs_2.5.0-release.

    1 /*************************************************************************/
    2 /*                                                                       */
    3 /*                Centre for Speech Technology Research                  */
    4 /*                     University of Edinburgh, UK                       */
    5 /*                      Copyright (c) 1996,1997                          */
    6 /*                        All Rights Reserved.                           */
    7 /*                                                                       */
    8 /*  Permission is hereby granted, free of charge, to use and distribute  */
    9 /*  this software and its documentation without restriction, including   */
   10 /*  without limitation the rights to use, copy, modify, merge, publish,  */
   11 /*  distribute, sublicense, and/or sell copies of this work, and to      */
   12 /*  permit persons to whom this work is furnished to do so, subject to   */
   13 /*  the following conditions:                                            */
   14 /*   1. The code must retain the above copyright notice, this list of    */
   15 /*      conditions and the following disclaimer.                         */
   16 /*   2. Any modifications must be clearly marked as such.                */
   17 /*   3. Original authors' names are not deleted.                         */
   18 /*   4. The authors' names are not used to endorse or promote products   */
   19 /*      derived from this software without specific prior written        */
   20 /*      permission.                                                      */
   21 /*                                                                       */
   22 /*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        */
   23 /*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
   24 /*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
   25 /*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     */
   26 /*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
   27 /*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
   28 /*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
   29 /*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
   30 /*  THIS SOFTWARE.                                                       */
   31 /*                                                                       */
   32 /*************************************************************************/
   33 /*                     Author :  Alan W Black                            */
   34 /*                     Date   :  May 1996                                */
   35 /*-----------------------------------------------------------------------*/
   36 /*                                                                       */
   37 /* Public declarations for Wagon (CART builder)                          */
   38 /*                                                                       */
   39 /*=======================================================================*/
   40 #ifndef __WAGON_H__
   41 #define __WAGON_H__
   42 
   43 #include "EST_String.h"
   44 #include "EST_Val.h"
   45 #include "EST_TVector.h"
   46 #include "EST_TList.h"
   47 #include "EST_simplestats.h"  /* For EST_SuffStats class */
   48 #include "EST_Track.h"
   49 #include "siod.h"
   50 
   51 //  When set to one wagon supports using multiple threads if
   52 //  --omp_nthreads X is used (works for most gccs)
   53 // #define OMP_WAGON 1
   54 #ifdef OMP_WAGON
   55 #include "omp.h"
   56 #endif
   57 
   58 #define wagon_error(WMESS) (cerr << WMESS << endl,exit(-1))
   59 
   60 // I get floating point exceptions of Alphas when I do any comparisons
   61 // with HUGE_VAL or FLT_MAX so I'll make my own
   62 #define WGN_HUGE_VAL 1.0e20
   63 
   64 class WVector : public EST_FVector
   65 {
   66   public:
   67     WVector(int n) : EST_FVector(n) {}
   68     int get_int_val(int n) const { return (int)a_no_check(n); }
   69     float get_flt_val(int n) const { return a_no_check(n); }
   70     void set_int_val(int n,int i) { a_check(n) = (int)i; }
   71     void set_flt_val(int n,float f) { a_check(n) = f; }
   72 };
   73 
   74 typedef EST_TList<WVector *> WVectorList;
   75 typedef EST_TVector<WVector *> WVectorVector;
   76 
   77 /* Different types of feature */
   78 enum wn_dtype {/* for predictees and predictors */
   79                wndt_binary, wndt_float, wndt_class, 
   80                /* for predictees only */
   81                wndt_cluster, wndt_vector, wndt_matrix, wndt_trajectory,
   82                wndt_ols, 
   83                /* for ignored features */
   84                wndt_ignore};
   85 
   86 class WDataSet : public WVectorList {
   87   private:
   88     int dlength;
   89     EST_IVector p_type;
   90     EST_IVector p_ignore;
   91     EST_StrVector p_name;
   92   public:
   93     void load_description(const EST_String& descfname,LISP ignores);
   94     void ignore_non_numbers();
   95 
   96     int ftype(const int &i) const {return p_type(i);}
   97     int ignore(int i) const {return p_ignore(i); }
   98     void set_ignore(int i,int value) { p_ignore[i] = value; }
   99     const EST_String &feat_name(const int &i) const {return p_name(i);}
  100     int samples(void) const {return length();}
  101     int width(void) const {return dlength;}
  102 };    
  103 enum wn_oper {wnop_equal, wnop_binary, wnop_greaterthan, 
  104           wnop_lessthan, wnop_is, wnop_in, wnop_matches};
  105 
  106 class WQuestion {
  107   private:
  108     int feature_pos;
  109     wn_oper op;
  110     int yes;
  111     int no;
  112     EST_Val operand1;
  113     EST_IList operandl;
  114     float score;
  115   public:
  116     WQuestion() {;}
  117     WQuestion(const WQuestion &s) 
  118        { feature_pos=s.feature_pos;
  119          op=s.op; yes=s.yes; no=s.no; operand1=s.operand1;
  120      operandl = s.operandl; score=s.score;}
  121     ~WQuestion() {;}
  122     WQuestion(int fp, wn_oper o,EST_Val a)
  123        { feature_pos=fp; op=o; operand1=a; }
  124     void set_fp(const int &fp) {feature_pos=fp;}
  125     void set_oper(const wn_oper &o) {op=o;}
  126     void set_operand1(const EST_Val &a) {operand1 = a;}
  127     void set_yes(const int &y) {yes=y;}
  128     void set_no(const int &n) {no=n;}
  129     int get_yes(void) const {return yes;}
  130     int get_no(void) const {return no;}
  131     const int get_fp(void) const {return feature_pos;}
  132     const wn_oper get_op(void) const {return op;}
  133     const EST_Val get_operand1(void) const {return operand1;}
  134     const EST_IList &get_operandl(void) const {return operandl;}
  135     const float get_score(void) const {return score;}
  136     void set_score(const float &f) {score=f;}
  137     const int ask(const WVector &w) const;
  138     friend ostream& operator<<(ostream& s, const WQuestion &q);
  139 };
  140 
  141 enum wnim_type {wnim_unset, wnim_float, wnim_class, 
  142                 wnim_cluster, wnim_vector, wnim_matrix, wnim_ols,
  143                 wnim_trajectory};
  144 
  145 //  Impurity measure for cumulating impurities from set of data
  146 class WImpurity {
  147   private:
  148     wnim_type t;
  149     EST_SuffStats a;
  150     EST_DiscreteProbDistribution p;
  151 
  152     float cluster_impurity();
  153     float cluster_member_mean(int i);
  154     float vector_impurity();
  155     float trajectory_impurity();
  156     float ols_impurity();
  157   public:
  158     EST_IList members;            // Maybe there should be a cluster class
  159     EST_FList member_counts;      // AUP: Implement counts for vectors
  160     EST_SuffStats **trajectory;
  161     const WVectorVector *data;          // Needed for ols
  162     float score;
  163     int l,width;
  164 
  165     WImpurity() { t=wnim_unset; a.reset(); trajectory=0; l=0; width=0; data=0;}
  166     ~WImpurity();
  167     WImpurity(const WVectorVector &ds);
  168     void copy(const WImpurity &s) 
  169     {
  170         int i,j; 
  171         t=s.t; a=s.a; p=s.p; members=s.members; member_counts = s.member_counts; l=s.l; width=s.width;
  172         score = s.score;
  173         data = s.data;
  174         if (s.trajectory)
  175         {
  176             trajectory = new EST_SuffStats *[l];
  177             for (i=0; i<l; i++)
  178             {
  179                 trajectory[i] = new EST_SuffStats[width];
  180                 for (j=0; j<width; j++)
  181                     trajectory[i][j] = s.trajectory[i][j];
  182             }
  183         }
  184     }
  185     WImpurity &operator = (const WImpurity &a) { copy(a); return *this; }
  186 
  187     float measure(void);
  188     double samples(void);
  189     wnim_type type(void) const { return t;}
  190     void cumulate(const float pv,double count=1.0);
  191     EST_Val value(void);
  192     EST_DiscreteProbDistribution &pd() { return p; }
  193     float cluster_distance(int i); // distance i from centre in sds
  194     int in_cluster(int i);       // distance i from centre < most remote member
  195     float cluster_ranking(int i);  // position in closeness to centre
  196     friend ostream& operator<<(ostream &s, WImpurity &imp);
  197 };
  198 
  199 class WDlist {
  200   private:
  201     float p_score;
  202     WQuestion p_question;
  203     EST_String p_token;
  204     int p_freq;
  205     int p_samples;
  206     WDlist *next;
  207   public:
  208     WDlist() { next=0; }
  209     ~WDlist() { if (next != 0) delete next; }
  210     void set_score(float s) { p_score = s; }
  211     void set_question(const WQuestion &q) { p_question = q; }
  212     void set_best(const EST_String &t,int freq, int samples)
  213     { p_token = t; p_freq = freq; p_samples = samples;}
  214     float score() const {return p_score;}
  215     const EST_String &token(void) const {return p_token;}
  216     const WQuestion &question() const {return p_question;}
  217     EST_Val predict(const WVector &w);
  218     friend WDlist *add_to_dlist(WDlist *l,WDlist *a);
  219     friend ostream &operator<<(ostream &s, WDlist &d);
  220 };
  221 
  222 class WNode {
  223   private:
  224     WVectorVector data;
  225     WQuestion question;
  226     WImpurity impurity;
  227     WNode *left;
  228     WNode *right;
  229     void print_out(ostream &s, int margin);
  230     int leaf(void) const { return ((left == 0) || (right == 0)); }
  231     int pure(void);
  232   public:
  233     WNode() { left = right = 0; }
  234     ~WNode() { if (left != 0) {delete left; left=0;}
  235            if (right != 0) {delete right; right=0;} }
  236     WVectorVector &get_data(void) { return data; }
  237     void set_subnodes(WNode *l,WNode *r) { left=l; right=r; }
  238     void set_impurity(const WImpurity &imp) {impurity=imp;}
  239     void set_question(const WQuestion &q) {question=q;}
  240     void prune(void);
  241     void held_out_prune(void);
  242     WImpurity &get_impurity(void) {return impurity;}
  243     WQuestion &get_question(void) {return question;}
  244     EST_Val predict(const WVector &w);
  245     WNode *predict_node(const WVector &d);
  246     int samples(void) const { return data.n(); }
  247     friend ostream& operator<<(ostream &s, WNode &n);
  248 };
  249 
  250 extern Discretes wgn_discretes;
  251 extern WDataSet wgn_dataset;
  252 extern WDataSet wgn_test_dataset;
  253 extern EST_FMatrix wgn_DistMatrix;
  254 extern EST_Track wgn_VertexTrack;
  255 extern EST_Track wgn_UnitTrack;
  256 extern EST_Track wgn_VertexFeats;
  257 
  258 void wgn_load_datadescription(EST_String fname,LISP ignores);
  259 void wgn_load_dataset(WDataSet &ds,EST_String fname);
  260 WNode *wgn_build_tree(float &score);
  261 WNode *wgn_build_dlist(float &score,ostream *output);
  262 WNode *wagon_stepwise(float limit);
  263 float wgn_score_question(WQuestion &q, WVectorVector &ds);
  264 void wgn_find_split(WQuestion &q,WVectorVector &ds,
  265         WVectorVector &y,WVectorVector &n);
  266 float summary_results(WNode &tree,ostream *output);
  267 
  268 extern int wgn_min_cluster_size;
  269 extern int wgn_max_questions;
  270 extern int wgn_held_out;
  271 extern float wgn_dropout_feats;
  272 extern float wgn_dropout_samples;
  273 extern int wgn_cos;
  274 extern int wgn_prune;
  275 extern int wgn_quiet;
  276 extern int wgn_verbose;
  277 extern int wgn_predictee;
  278 extern int wgn_count_field;
  279 extern EST_String wgn_count_field_name;
  280 extern EST_String wgn_predictee_name;
  281 extern float wgn_float_range_split;
  282 extern float wgn_balance;
  283 extern EST_String wgn_opt_param;
  284 extern EST_String wgn_vertex_output;
  285 
  286 #define wgn_ques_feature(X) (get_c_string(car(X)))
  287 #define wgn_ques_oper_str(X) (get_c_string(car(cdr(X))))
  288 #define wgn_ques_operand(X) (car(cdr(cdr(X))))
  289 
  290 int wagon_ask_question(LISP question, LISP value);
  291 
  292 int stepwise_ols(const EST_FMatrix &X,
  293          const EST_FMatrix &Y,
  294          const EST_StrList &feat_names,
  295          float limit,
  296          EST_FMatrix &coeffs,
  297          const EST_FMatrix &Xtest,
  298          const EST_FMatrix &Ytest,
  299                  EST_IVector &included,
  300                  float &best_score);
  301 int robust_ols(const EST_FMatrix &X,
  302            const EST_FMatrix &Y, 
  303            EST_IVector &included,
  304            EST_FMatrix &coeffs);
  305 int ols_apply(const EST_FMatrix &samples,
  306           const EST_FMatrix &coeffs,
  307           EST_FMatrix &res);
  308 int ols_test(const EST_FMatrix &real,
  309          const EST_FMatrix &predicted,
  310          float &correlation,
  311          float &rmse);
  312 
  313 #endif /* __WAGON_H__ */