"Fossies" - the Fresh Open Source Software Archive

Member "augustus-3.3.3/include/statemodel.hh" (22 May 2019, 8618 Bytes) of package /linux/misc/augustus-3.3.3.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "statemodel.hh" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 3.3.2_vs_3.3.3.

    1 /*
    2  * statemodel.hh
    3  *
    4  * License: Artistic License, see file LICENSE.TXT or 
    5  *          https://opensource.org/licenses/artistic-license-1.0
    6  */
    7 
    8 #ifndef _STATEMODEL_HH
    9 #define _STATEMODEL_HH
   10 
   11 // project includes
   12 #include "matrix.hh"
   13 #include "vitmatrix.hh"
   14 #include "merkmal.hh"
   15 
   16 
   17 
   18 /*
   19  * constants used in splitting into state/substate pairs
   20  * we use the lower byte for the state, and the upper 3 for the substate
   21  * SHIFT_LEFT is used to build the full id from the substate id
   22  * SHIFT_RIGHT is used to retrieve back the substate id
   23  */
   24 #define SHIFT_SIZE (sizeof (signed char)*8)
   25 #define SHIFT_LEFT(x) ((x) << SHIFT_SIZE)
   26 #define SHIFT_RIGHT(x) ((x) >> SHIFT_SIZE)
   27 #define MAX_STATECOUNT SHIFT_LEFT(1)
   28 
   29 
   30 /*
   31  * getFullStateId:  merge state/substate pairs into single integers (used in calls of viterbi)
   32  *                  precondition: state >= -1, substate >= -1
   33  *                  if substate is -1 (none), state remains unchanged
   34  * getStatePair:    split integers back into state/substate pairs
   35  *
   36  */
   37 inline int getFullStateId(int state, SubstateId substate = SubstateId()) { 
   38     return SHIFT_LEFT (substate.fullId()) + state;
   39 }
   40 inline void getStatePair(int fullState, int& state, SubstateId& substate) {
   41     state = (signed char)(fullState);
   42     substate.set(SHIFT_RIGHT(fullState -state));
   43 }
   44 
   45 /**
   46  * @brief Predecessor in the state transition Graph
   47  * 
   48  * @author Emmanouil Stafilarakis
   49  * @author Mario Stanke
   50  */
   51 struct Ancestor{
   52     Ancestor(int newpos=0, Double newval=0.0) : pos(newpos), val(newval) {} 
   53     int     pos;
   54     Double  val;
   55 };
   56 
   57 /**
   58  * @brief This is the base interface class common to all state model classes
   59  * (ExonModel, IntronModel, IGenicModel, UTRModel)
   60  * 
   61  * @author Emmanouil Stafilarakis
   62  * @author Mario Stanke
   63  *
   64  */
   65 class StateModel {
   66 protected:
   67     StateModel() {}  // do not create StateModel object
   68 public:
   69     void initPredecessors(Matrix<Double>&, int self);
   70 
   71     // virtual methods to be implemented by the specialised classes
   72     virtual void registerPars(Parameters* parameters) {}
   73     virtual void buildModel(const AnnoSequence* annoseq, int parIndex) =0;
   74     virtual void printProbabilities(int zusNumber=1, BaseCount *bc = NULL, const char* suffix = NULL) =0;
   75     virtual StateType getStateType() const = 0;
   76     virtual void viterbiForwardAndSampling(ViterbiMatrixType&, ViterbiMatrixType&, int, int,
   77                        AlgorithmVariant, OptionListItem&) = 0;
   78     virtual Double emiProbUnderModel(int , int) const = 0;
   79     virtual void initAlgorithms(Matrix<Double>&, int) = 0;
   80     virtual void updateToLocalGCEach(Matrix<Double>& trans, int cur) { };
   81     virtual ~StateModel() {} // nothing to do here since class is purely abstract
   82 
   83     // class functions
   84     static void init();
   85     static StateModel* newStateModelPtr (const char* path);
   86     static void determineShortPatterns(const vector<Integer>& patcounts, int k, int minCount);
   87     static void makeProbsFromCounts(vector<Double > &patprobs , const vector<Integer > &patcounts, 
   88                     int k, Double pseudocount, Boolean shorten = false);
   89     static void computeEmiFromPat(const vector<Double>& patprobs, vector<Double>& emiprobs, Integer k);
   90     static void prepareViterbi(const char* dna, int len, const vector<StateType> &stateMap);
   91     static void readProbabilities(int);
   92     static void resetPars();
   93     // this is done when gcIdx changes once per state class
   94     static void updateToLocalGC(int idx, int from = -1, int to = -1);
   95     static void readAllParameters();
   96     static void storeGCPars(int);
   97     static void resetModelCounts();
   98     static bool isPossibleDSS(int pos) {
   99     return pos >= 1 && pos <= dnalen-2 &&
  100         (onGenDSS(sequence + pos) || 
  101          seqFeatColl->isHintedDSS(pos, plusstrand));
  102     } 
  103     static bool isPossibleRDSS(int pos) {
  104     return pos >= 1 && pos <= dnalen-2 &&
  105         (onGenRDSS(sequence + pos -1) ||
  106          seqFeatColl->isHintedDSS(pos, minusstrand));
  107     } 
  108     static bool isPossibleASS(int pos) {
  109     return pos >= 1 && pos <= dnalen-2 &&
  110         (onASS(sequence + pos -1) ||
  111          seqFeatColl->isHintedASS(pos, plusstrand));
  112     } 
  113     static bool isPossibleRASS(int pos) {
  114     return pos >= 1 && pos <= dnalen-2 &&
  115         (onRASS(sequence + pos) ||
  116          seqFeatColl->isHintedASS(pos, minusstrand));
  117     } 
  118     static void setSFC(SequenceFeatureCollection *psfc) {
  119     seqFeatColl = psfc;
  120     }
  121     static void setPP(PP::SubstateModel* mdl) {
  122     profileModel = mdl;
  123     }
  124     static void setCountRegion(int from, int to){countStart = from; countEnd = to;}
  125     static int getActiveWindowStart(int);
  126     static void setGCIdx(int idx) {gcIdx = idx;}
  127     static void setContentStairs(ContentStairs *stairs) {cs = stairs;}
  128     static int getGCIdx(int at){if (cs) return cs->idx[at]; else return -1;}
  129 protected:
  130     // variable unique to each model
  131     vector<Ancestor>  ancestor;    // predecessor in the state transition graph
  132 
  133     // class variables shared by all models
  134     static const vector<StateType>* stateMap;  // needed in exonmodel 
  135     static const char* sequence;   // the sequence currently examined
  136     static int dnalen;
  137     static SequenceFeatureCollection* seqFeatColl;
  138     static vector<Boolean>* shortpattern;
  139     static PP::SubstateModel* profileModel;
  140     static int countStart, countEnd; // this is the range of positions over which features are counted in CRF training
  141     static int activeWinLen;         // states ending before the active window will be deleted if they are not yet used
  142     static int gcIdx; // GC content class index for all states
  143     static ContentStairs *cs;
  144 }; // class StateModel
  145 
  146 
  147 /**
  148  * @brief intelligently store and retrieve the sequence emission probabilities 
  149  *        of the sequence from a to b for common pairs of a and b
  150  * 
  151  * @author Emmanouil Stafilarakis
  152  * @author Mario Stanke
  153  */
  154 class SnippetListItem {
  155 public:
  156     SnippetListItem (Double p, int length) { prob = p; len = length; next = NULL;}
  157     ~SnippetListItem() {
  158     if (next)
  159         delete next;
  160     }
  161     Double prob;
  162     int len;
  163     SnippetListItem *next;
  164 };
  165 
  166 /**
  167  * @author Emmanouil Stafilarakis
  168  * @author Mario Stanke
  169  */ 
  170 class SnippetList {
  171 public:
  172     Double getProb(int base, int &partlen);
  173     SnippetList() {first = last = NULL;}
  174     ~SnippetList(){}
  175     SnippetListItem *first, *last;
  176 };
  177 
  178 /**
  179  * @author Emmanouil Stafilarakis
  180  * @author Mario Stanke
  181  */
  182 class SnippetProbs {
  183 public:
  184     SnippetProbs(const char* dna, int k, bool forwardStrand=true){
  185     this->dna = dna;
  186     this->k = k;
  187     this->forwardStrand = forwardStrand;
  188     if (dna)
  189       n = strlen(dna);
  190     else 
  191       n = 0;
  192     snippetlist = new SnippetList*[n];
  193     for (int i=0; i<n; i++)
  194         snippetlist[i] = NULL;
  195     }
  196     Double getSeqProb(int base, int len);
  197     ~SnippetProbs () {
  198     if (snippetlist) {
  199         for (int i=0; i<n; i++) {
  200         if (snippetlist[i]){
  201             delete snippetlist[i]->first;
  202             delete snippetlist[i];
  203         }
  204         }
  205         delete [] snippetlist;
  206     }
  207     }
  208     void setEmiProbs(vector<Double> *emiprobs) {
  209     this->emiprobs = emiprobs;
  210     }
  211 
  212     void addProb(int base, int len, Double p);
  213 
  214 protected:
  215     const char *dna;
  216     int n;
  217     vector<Double> *emiprobs;
  218     int k;
  219     SnippetList **snippetlist;
  220     bool forwardStrand;
  221 private:
  222     Double getElemSeqProb(int base, int len);
  223 };
  224 
  225 /**
  226  * @brief another class for caching probabilities of sequence segments
  227  * @details idea: cumulative product => get segment prob in constant time via a ratio 
  228  * 
  229  * @author Emmanouil Stafilarakis
  230  * @author Mario Stanke
  231  */
  232 class SegProbs {
  233 public:
  234     SegProbs(const char* dna, int k, bool forwardStrand=true) : s2i(k+1) {
  235     this->dna = dna;
  236     this->k = k;
  237     this->forwardStrand = forwardStrand;
  238     if (dna)
  239         n = strlen(dna);
  240     else 
  241         n = 0;
  242     cumProds = new Double[n+1];
  243     }
  244     ~SegProbs () { delete [] cumProds; }
  245     void setEmiProbs(vector<Double> *emiprobs, int from = -1, int to = -1);
  246     Double getSeqProb(int from, int to);
  247     int getN() { return n; }
  248 protected:
  249     const char *dna;
  250     int n;
  251     int k;
  252     Seq2Int s2i;
  253     vector<Double> *emiprobs;
  254     Double *cumProds;
  255     bool forwardStrand;
  256 };
  257 
  258 /**
  259  * @brief data structure to store possible endOfPred positions to iterate directly
  260  * only over those endOfPred positions, that are possible in the inner loop of
  261  * the Viterbi algorithm.
  262  * 
  263  * @author Emmanouil Stafilarakis
  264  * @author Mario Stanke
  265  */
  266 class EOPList {
  267 public:
  268    void clear() { possibleEndOfPreds.clear(); }
  269    void init() {
  270       eopit = possibleEndOfPreds.begin();
  271       inCache = false;
  272    }
  273    void decrement(int &endOfPred);
  274    void update(int endOfPred);
  275    list<int> possibleEndOfPreds;
  276    list<int>::iterator eopit;
  277    bool inCache;
  278 };
  279 
  280 
  281 #endif  //  _STATEMODEL_HH