"Fossies" - the Fresh Open Source Software Archive

Member "augustus-3.3.3/src/hints.cc" (13 Sep 2019, 31345 Bytes) of package /linux/misc/augustus-3.3.3.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "hints.cc" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 3.3.2_vs_3.3.3.

    1 /*
    2  * hints.cc
    3  *
    4  * License: Artistic License, see file LICENSE.TXT or 
    5  *          https://opensource.org/licenses/artistic-license-1.0
    6  * 
    7  * Description: hints, user constraints
    8  */
    9 
   10 #include "hints.hh"
   11 #include "properties.hh"
   12 #include "projectio.hh"
   13 
   14 #include <iostream>
   15 #include <cstdlib>
   16 #include <set>
   17 
   18 const char* featureTypeNames[NUM_FEATURE_TYPES]= {"start", "stop", "ass", "dss", "tss", "tts",
   19                           "exonpart", "exon", "intronpart", "intron",
   20                           "irpart", "CDS", "CDSpart", "UTR", "UTRpart", "nonexonpart", "genicpart"};
   21 
   22 bool isSignalType(FeatureType type){
   23     return (type == startF || type == stopF || type == assF || type == dssF || type == tssF || type == ttsF);
   24 }
   25 
   26 bool isGFF(ifstream &ifstrm){
   27     ifstrm.seekg(0);
   28     // skip comments  
   29    ifstrm >> comment >> ws;
   30     if(!(ifstrm))
   31         return false;
   32     streampos spos = ifstrm.tellg();
   33     string line;
   34     getline(ifstrm,line);
   35     size_t pos = line.find('\t');
   36     if (pos != string::npos) {
   37         ifstrm.clear();
   38         ifstrm.seekg(spos);
   39         return true;
   40     }
   41     // line not tap separated   
   42     return false;
   43 }
   44 
   45 ostream& operator<<(ostream&out, Feature& feature){
   46     out << feature.seqname << "\t"
   47     << feature.source << "\t"
   48     << feature.feature << "\t"
   49     << Feature::offset + feature.start + 1 << "\t" 
   50     << Feature::offset + feature.end + 1 << "\t"
   51     << feature.score << "\t";
   52     switch (feature.strand) {
   53     case plusstrand: out << "+"; break;
   54     case minusstrand: out << "-"; break;
   55     default: out << ".";
   56     }
   57     out << "\t";
   58     if (feature.frame != -1)
   59     out << feature.frame;
   60     else 
   61     out << ".";
   62     out << "\t";
   63     if (feature.groupname != "")
   64     out << "grp=" << feature.groupname << ";";
   65     if (feature.mult > 1)
   66     out << "mult=" << feature.mult << ";";
   67     if (feature.priority >= 0)
   68     out << "pri=" << feature.priority << ";";
   69     out << "src=" << feature.esource
   70     //<< feature.attributes
   71     << " \"" << feature.bonus << ";" << feature.malus << ";" << feature.numSupporting << ":" << feature.numContradicting <<"\"";
   72     return out;
   73 }
   74 
   75 istream& operator>>( istream& in, Feature& feature ){
   76     char buff[1024]; 
   77     char copybuff[1024];
   78     char *temp;
   79     const char *spos;
   80     try {
   81     in.getline(buff, 1024);
   82     strncpy(copybuff, buff, 1014);
   83 
   84     if (strstr(buff, "\t")==NULL) {
   85         throw ProjectError("Line not tab separated.");
   86     }
   87     temp = strtok(buff, "\t");
   88     if (temp)
   89         feature.seqname = temp;
   90     else 
   91         throw ProjectError("Could not read sequence name.");
   92     
   93     temp = strtok(NULL, "\t");
   94     if (temp)
   95         feature.source = temp;
   96     else
   97         throw ProjectError("Could not read second column.");
   98     temp = strtok(NULL, "\t");
   99     if (temp)
  100         feature.feature = temp;
  101     else 
  102         throw ProjectError("Could not read feature type.");
  103     temp = strtok(NULL, "\t");
  104     if (temp)
  105         feature.start = atoi(temp);
  106     else 
  107         throw ProjectError("Could not read start position.");
  108     temp = strtok(NULL, "\t");
  109     if (temp)
  110         feature.end = atoi(temp);
  111     else 
  112         throw ProjectError("Could not read end position.");
  113     temp = strtok(NULL, "\t");
  114     if (temp) 
  115         feature.score = atof(temp);
  116     else 
  117         throw ProjectError("Could not read score.");
  118     temp = strtok(NULL, "\t"); // stand
  119     if (!temp)
  120         throw ProjectError("Could not read strand.");
  121     if (strcmp(temp, "+") == 0)
  122         feature.strand = plusstrand;
  123     else if (strcmp(temp, "-") == 0)
  124         feature.strand = minusstrand;
  125     else 
  126         feature.strand = STRAND_UNKNOWN;
  127     temp = strtok(NULL, "\t"); // frame
  128     if (!temp)
  129         throw ProjectError("Could not read frame.");
  130     if (strcmp(temp, "0") == 0)
  131         feature.frame = 0;
  132     else if (strcmp(temp, "1") == 0)
  133         feature.frame = 1;
  134     else if (strcmp(temp, "2") == 0)
  135         feature.frame = 2;
  136     else 
  137         feature.frame = -1;
  138     
  139     temp = strtok(NULL, "\t");
  140     if (temp){
  141         feature.attributes = temp;
  142         //replace dos carriage return by space
  143         for (int i=0; i < feature.attributes.length(); i++) 
  144         if (feature.attributes[i]=='\r')
  145             feature.attributes[i]=' ';
  146     }
  147     else 
  148         throw ProjectError("Could not read last column.");
  149     
  150     /*
  151      * find groupname of hint, specified in gff as: group=xxx; or grp=xxx;
  152      */
  153     spos = strstr(feature.attributes.c_str(), "group=");
  154     if (spos)
  155         spos += 6;
  156     if (!spos) {
  157         spos = strstr(feature.attributes.c_str(), "grp=");
  158         if (spos)
  159         spos += 4;
  160     }
  161     if (spos){
  162         int skeylen=1;
  163         while (*(spos+skeylen) != ';' && *(spos+skeylen) != ' ' && *(spos+skeylen) != '\0'){
  164         skeylen++;
  165         }
  166         feature.groupname = string(spos, skeylen);
  167     } else {
  168         feature.groupname = "";
  169     }
  170     
  171     /*
  172      * find priority of hint, specified in gff as: priority=N; or pri=N;
  173      * higher number means higher priority
  174      */
  175     spos = strstr(feature.attributes.c_str(), "priority=");
  176     if (spos)
  177         spos += 9;
  178     if (!spos) {
  179         spos = strstr(feature.attributes.c_str(), "pri=");
  180         if (spos)
  181         spos += 4;
  182     }
  183     if (spos){
  184         feature.priority = atoi(spos);
  185     } else {
  186         feature.priority = -1;
  187     }
  188     /*
  189      * find multiplicity of hint, specified in gff as: mult=N;
  190      */
  191     spos = strstr(feature.attributes.c_str(), "mult=");
  192     if (spos)
  193         spos += 5;
  194     if (spos)
  195         feature.mult = atoi(spos);
  196     else
  197         feature.mult = 1;
  198     
  199     /*
  200      * find source of extrinsic info, specified in gff as: source=X or src=X
  201      */
  202     spos = strstr(feature.attributes.c_str(), "source=");
  203     if (spos)
  204         spos += 7;
  205     if (!spos) {
  206         spos = strstr(feature.attributes.c_str(), "src=");
  207         if (spos)
  208         spos += 4;
  209     }
  210     if (spos && isalpha(*spos)){
  211         int skeylen=1;
  212         while (isalpha(*(spos+skeylen))){
  213         skeylen++;
  214         }
  215         feature.esource = string(spos, skeylen);
  216     } else {
  217         cerr << "Error in hint line: " << copybuff << endl;
  218         cerr << "No source specified (e.g. by source=M in the last column)" << endl;
  219     }
  220     feature.gradeclass = 0;// is set by class FeatureCollection
  221     
  222     feature.start--; // shift by -1, cause I start indexing with 0
  223     feature.end--;   // shift by -1, cause I start indexing with 0
  224     
  225     feature.type = Feature::getFeatureType(feature.feature); // may fail: type = -1
  226     } catch (ProjectError &e) {
  227     cerr << "Error in hint line: " << copybuff << endl;
  228     cerr << e.getMessage() << endl;
  229     cerr << "Maybe you used spaces instead of tabulators?" << endl;
  230     throw e;
  231     }
  232     return in;
  233 }
  234 
  235 int Feature::offset = 0;
  236 
  237 FeatureType Feature::getFeatureType(string typestring){
  238     FeatureType type;
  239     if (typestring == "dss")
  240     type = dssF;
  241     else if (typestring == "ass")
  242     type = assF;
  243     else if (typestring == "stop")
  244     type = stopF;
  245     else if (typestring == "start")
  246     type = startF;
  247     else if (typestring == "exonpart" || typestring == "ep")
  248     type = exonpartF;
  249     else if (typestring == "exon")
  250     type = exonF;
  251     else if (typestring == "intronpart" || typestring == "ip")
  252     type = intronpartF;  
  253     else if (typestring == "intron")
  254     type = intronF;
  255     else if (typestring == "tss")
  256     type = tssF;
  257     else if (typestring == "tts")
  258     type = ttsF;
  259     else if (typestring == "irpart")
  260     type = irpartF;
  261     else if (typestring == "CDS")
  262     type = CDSF;
  263     else if (typestring == "CDSpart" || typestring == "cp") 
  264     type = CDSpartF;
  265     else if (typestring == "UTR")
  266     type = UTRF;
  267     else if (typestring == "UTRpart" || typestring == "up")
  268     type = UTRpartF;
  269     else if (typestring == "nonexonpart" || typestring == "nep")
  270     type = nonexonpartF;
  271     else if (typestring == "nonirpart" || typestring == "genicpart")
  272     type = nonirpartF;
  273     else {
  274       cerr << "Unknown hint feature '" << typestring << "'. Ignoring it. For a list of the 17 allowed features ";
  275       cerr << "see the first column of the table in config/extrinsic/extrinsic.cfg." << endl;
  276       type = (FeatureType) -1;
  277     }
  278     return type;
  279 }
  280 
  281 FeatureType Feature::getFeatureType(int typeint){
  282     if(typeint < NUM_FEATURE_TYPES && typeint >=0)
  283     return (FeatureType)typeint;
  284     return (FeatureType) -1;
  285 }
  286 
  287 /*
  288  * Feature::compatibleWith
  289  * True if a gene structure could exist that is compatible with both features.
  290  */
  291 bool Feature::compatibleWith(Feature &other){
  292     const int transcript_fuzzy_margin = 50; // a tss/tts at most this far from the end of an exon/UTR end is still considered compatible
  293     const int term3_M = 1000; // two tts at most this far apart but not overlapping and not suggesting tail to tail genes are incompatible
  294     const int term5_M = 0; // two tss at most this far apart but not overlapping and not suggesting head to head genes are incompatible
  295     
  296     if (start > other.end || end < other.start){ // no overlap
  297     if ((type == tssF && other.type == tssF) &&
  298         (abs((end+start)-(other.end+other.start))/2 <= term5_M) && // close
  299         (strand == other.strand)){
  300         return false; // tss on same strand and close but not overlapping (macro-heterogeneity)
  301     }
  302     if ((type == ttsF && other.type == ttsF) &&
  303         (abs((end+start)-(other.end+other.start))/2 <= term3_M) && // close
  304         (strand == other.strand)){
  305         return false; // tts on same strand and close but not overlapping (macro-heterogeneity)
  306     }
  307     return true;
  308     }
  309     
  310     // overlapping case follows
  311     // opposite strands are incompatible unless we have at least one signal type
  312     if ((strand == minusstrand && other.strand == plusstrand) || (strand == plusstrand && other.strand == minusstrand)) {
  313     // this is an approximation, should take into account that min length of start and stop is 3
  314     if (isSignalType(type) && isSignalType(other.type))
  315         return true;
  316     if (isSignalType(type) && (start < other.start || end > other.end))
  317         return true;
  318     if (isSignalType(other.type) && (start > other.start || end < other.end))
  319         return true;
  320     return false;
  321     }
  322     // same strand (or bothstrands) follows
  323     if (type == other.type) { // same type
  324     if (start == other.start && end == other.end)
  325         return true;
  326     if (type == exonF || type == intronF || type == CDSF || type == UTRF)
  327         return false;
  328     return true;
  329     }
  330     Feature *f1=this, *f2=&other;
  331     // different types, wlg let f1 have the smaller index, otherwise swap
  332     if ((int) f1->type > (int) f2->type){
  333     f1 = &other;
  334     f2 = this;
  335     }
  336     if (f1->type == startF || f1->type == stopF){
  337     if ((f2->type == intronpartF || f2->type == intronF || f2->type == irpartF || f2->type == nonexonpartF || f2->type == UTRF || f2->type == UTRpartF) &&
  338         f1->start>= f2->start && f1->end <= f2->end) // start/stop interval included in non-CDS interval
  339         return false;
  340     if (f2->type == CDSpartF || f2->type == CDSF){
  341         if (f1->start > f2->start && f1->end < f2->end) // start/stop interval strictly included in CDS interval
  342         return false;
  343         // rest: true overlap or start/stop is included in CDS/CDSpart and right at the end
  344         // must not be at the wrong end
  345         if (strand == plusstrand && ((f1->type == startF && f1->start > f2->start) || (f1->type == stopF && f1->end < f2->end)))
  346         return false;
  347         if (strand == minusstrand && ((f1->type == startF && f1->end < f2->end) || (f1->type == stopF && f1->start > f2->start)))
  348         return false;
  349         if (f2->type == CDSF && (f1->end < f2->start + 2 || f1->start > f2->end-2)) // overlap less than 3
  350         return false;
  351         return true;
  352     }
  353     return true;
  354     }
  355     if (f1->type == assF || f1->type == dssF){
  356     if ((f2->type == irpartF || f2->type == UTRF || f2->type == UTRpartF || f2->type == exonpartF || f2->type == exonF || f2->type == CDSF || f2->type == CDSpartF) &&
  357         f1->start>= f2->start && f1->end <= f2->end) // splice site interval included in non-CDS interval
  358         return false;
  359     if (f2->type == intronF || f2->type == intronpartF){
  360         if (f1->start > f2->start && f1->end < f2->end) // splice site interval strictly included in intron interval
  361         return false;
  362         // rest: true overlap or splice site is included in intron/intronpart and right at an end
  363         // must not be at the wrong end
  364         if (strand == plusstrand && ((f1->type == dssF && f1->start > f2->start) || (f1->type == assF && f1->end < f2->end)))
  365         return false;
  366         if (strand == minusstrand && ((f1->type == dssF && f1->end < f2->end) || (f1->type == assF && f1->start > f2->start)))
  367         return false;
  368         return true;
  369     }
  370     return true;
  371     }
  372     if (f1->type == tssF || f1->type == ttsF){
  373     if ((f2->type == irpartF || f2->type == intronF || f2->type == intronpartF || f2->type == nonexonpartF || f2->type == CDSF || f2->type == CDSpartF) &&
  374         f1->start >= f2->start && f1->end <= f2->end) // transcript start/end interval included in non-UTR interval
  375         return false;
  376     if (f2->type == UTRF || f2->type == UTRpartF || f2->type == exonF || f2->type == exonpartF){
  377         if (f1->start > f2->start + transcript_fuzzy_margin && f1->end < f2->end - transcript_fuzzy_margin) // transcript start/end interval strictly included in UTR/exon interval
  378         return false;
  379         // rest: true overlap or transcript start/end site is included in UTR/exon and right at an end
  380         // must not be at the wrong end
  381         if (strand == plusstrand && ((f1->type == tssF && f1->start > f2->start + transcript_fuzzy_margin) || (f1->type == ttsF && f1->end < f2->end - transcript_fuzzy_margin)))
  382         return false;
  383         if (strand == minusstrand && ((f1->type == tssF && f1->end < f2->end - transcript_fuzzy_margin) || (f1->type == ttsF && f1->start > f2->start + transcript_fuzzy_margin)))
  384         return false;
  385         return true;
  386     }
  387     return true;
  388     }
  389     if (f1->type == exonpartF) {
  390     if (f2->type == intronpartF || f2->type == intronF || f2->type == irpartF || f2->type == nonexonpartF) // types that are simply not compatible with exonpart
  391         return false;
  392     if (f2->type == exonF && (f1->start < f2->start || f1->end > f2->end)) // exonpart not contained in exon
  393         return false;
  394     if (f2->type == UTRF && (f1->start < f2->start && f1->end > f2->end)) // exonpart properly contains UTR
  395         return false;
  396     return true;
  397     }
  398     if (f1->type == exonF) {
  399     if (f2->type == intronpartF || f2->type == intronF || f2->type == irpartF || f2->type == nonexonpartF) // types that are simple not compatible with exonpart
  400         return false;
  401     if (f2->type == CDSF && !(f1->start <= f2->start && f1->end >= f2->end)) // exon must contain CDS
  402         return false;
  403     if (f2->type == CDSpartF && (f1->start > f2->start || f1->end < f2->end)) // CDSpart covers something that exon does not cover
  404         return false;
  405     if (f2->type == UTRF &&
  406         !((f1->start == f2->start && f1->end >= f2->end) || (f1->end == f2->end && f1->start <= f2->end))) 
  407         return false;
  408     if (f2->type == UTRpartF && (f1->start > f2->start || f1->end < f2->end))
  409         return false;
  410     return true;
  411     }
  412     if (f1->type == intronpartF) {
  413     if (f2->type == intronF && (f1->start < f2->start || f1->end > f2->end))
  414         return false;
  415     if (f2->type == irpartF || f2->type == CDSF || f2->type == CDSpartF || f2->type == UTRF || f2->type == UTRpartF)
  416         return false;
  417     return true;
  418     }
  419     if (f1->type == intronF) {
  420     if (f2->type == irpartF || f2->type == CDSF || f2->type == CDSpartF || f2->type == UTRF || f2->type == UTRpartF)
  421         return false;
  422     return true;
  423     }
  424     if (f1->type == irpartF) {
  425     if (f2->type == nonexonpartF)
  426         return true;
  427     return false;
  428     }
  429     if (f1->type == CDSF) {
  430     if (f2->type == CDSpartF && (f1->start <= f2->start && f1->end >= f2->end)) // CDS contains CDSpart
  431         return true;
  432     return false;
  433     }
  434     if (f1->type == CDSpartF) 
  435     return false; // incompatible with UTRF, UTRpartF, nonexonpartF
  436     if (f1->type == UTRF){
  437     if (f2->type == UTRpartF && (f1->start <= f2->start && f1->end >= f2->end))
  438         return true;
  439     return false;
  440     }
  441     return false;
  442 }
  443 
  444 /*
  445  * Feature::weakerThan
  446  * True if every gene structure that is compatible with 'other' also is compatible with 'this'.
  447  * Strictly is true, if there could be gene structures that are only compatible with 'this' and not with other.
  448  * Usually, when a **partF interval is larger in other. 'strictly' is only correctly set when the return value is true.
  449  */
  450 bool Feature::weakerThan(Feature &other, bool &strictly){
  451     strictly = false;
  452     if (other.end < start || other.start > end)
  453     return false;
  454     if (type == other.type && start == other.start && end == other.end)
  455     return true;
  456     if (start != other.start || end != other.end)
  457     strictly = true;
  458     if (type == other.type && (type == startF || type == stopF || type == assF || type == dssF || type == tssF || type == ttsF) && (start <= other.start && end >= other.end))
  459     return true;
  460     if (type == exonpartF && (other.type == exonF || other.type == exonpartF) && (start >= other.start && end <= other.end))
  461     return true;
  462     if (type == intronpartF && (other.type == intronF || other.type == intronpartF) && (start >= other.start && end <= other.end))
  463     return true;
  464     if (type == irpartF && other.type == irpartF && (start >= other.start && end <= other.end))
  465     return true;
  466     if (type == CDSpartF && (other.type == CDSF || other.type == CDSpartF) && (start >= other.start && end <= other.end))
  467     return true;
  468     if (type == UTRpartF && (other.type == UTRF || other.type == UTRpartF) && (start >= other.start && end <= other.end))
  469     return true;
  470     if (type == nonexonpartF && other.type == nonexonpartF && (start >= other.start && end <= other.end))
  471     return true;
  472     if (type == nonirpartF && other.type != irpartF &&  (start >= other.start && end <= other.end))
  473     return true;
  474     return false;
  475 }
  476 
  477 /*
  478  * conformance
  479  * between 0 and 1. Close to 1 if lots of support and no contradiction.
  480  */
  481 double Feature::conformance(){
  482     int pseudoSupporting = 5, pseudoContradicting = 5;
  483     return (double) (pseudoSupporting + numSupporting) / (pseudoContradicting + pseudoSupporting + numSupporting + numContradicting);
  484 }
  485 
  486 /*
  487  * shift the coordinates of a feature by -seqStart
  488  * if seqStrand is minusstrand, the coordinates and strand of the
  489  * feature are set relative to the minusstrand 
  490  */
  491 void Feature::shiftCoordinates(int seqStart, int seqEnd, bool rc){
  492     if (!rc) {
  493     end -= seqStart;
  494     start -= seqStart;
  495     } else {
  496     int temp = end;
  497     end = seqEnd - start;
  498     start = seqEnd - temp;
  499     if (strand == plusstrand)
  500         strand = minusstrand;
  501     else if (strand == minusstrand)
  502         strand = plusstrand;
  503     }
  504 }
  505 
  506 void Feature::setFrame(string f){
  507     if(f == "0")
  508     frame=0;
  509     else if (f == "1")
  510     frame=1;
  511     else if (f == "2")
  512     frame=2;
  513     else
  514     frame=-1;
  515 }
  516 
  517 void Feature::setStrand(string s){
  518     if (s == "+")
  519     strand=plusstrand;
  520     else if (s == "-")
  521     strand=minusstrand;
  522     else
  523     strand=STRAND_UNKNOWN;
  524 }
  525 
  526 /*
  527  * operator<
  528  * is needed for the set operations in printAccuracyForSequenceSet
  529  * sorting is by increasing end positions
  530  */
  531 
  532 bool operator<(const Feature& f1, const Feature& f2){
  533     if (f1.end < f2.end)
  534     return true;
  535     else if (f1.end > f2.end)
  536     return false;
  537     if (f1.start < f2.start)
  538     return true;
  539     else if (f1.start > f2.start)
  540     return false;
  541     if (f1.strand < f2.strand)
  542     return true;
  543     else if (f1.strand > f2.strand)
  544     return false;
  545     if (f1.frame < f2.frame)
  546     return true;
  547     else if (f1.frame > f2.frame)
  548     return false;
  549     if (f1.esource < f2.esource && f2.esource != "annotrain" && f1.esource != "annotrain")
  550     return true;
  551     return false;
  552 }
  553 
  554 bool operator==(const Feature& f1, const Feature& f2){
  555     return ((f1.type == f2.type) && (f1.end == f2.end) && (f1.start == f2.start) && (f1.strand == f2.strand) && (f1.frame == f2.frame));
  556 }
  557 
  558 double Feature::distance_faded_bonus(int pos){
  559     double result;
  560     if (pos < start || pos > end)
  561     result=1.0;
  562     else {
  563     double delta = 2.0*(pos - (end+start)/2.0)/(end-start+1); 
  564     if (delta < 0)
  565         delta = -delta; // delta between 0 and 1, delta=0 when pos in the middle of hint interval, delta=1 when pos at an end of hint interval
  566     if (delta == 0.0)
  567         result=bonus; // shortcut to save the expensive log and exp operations
  568     else 
  569         result = exp(log(bonus) * (1-delta));
  570     }
  571     return result;
  572 }
  573 
  574 /*
  575  * HintGroup::addFeature
  576  * put the feature at the right place: sorted increasingly by end positions
  577  */
  578 void HintGroup::addFeature(Feature *hint){
  579     if (hints == NULL) {
  580     hints = new list<Feature*>;
  581     name = hint->groupname;
  582     }
  583 // TODO. This could be more efficient when the list is traversed from the end
  584     list<Feature*>::iterator fit = hints->begin();
  585     while(fit != hints->end() && (*fit)->end < hint->end)
  586     fit++;
  587     hints->insert(fit, hint);
  588     // if feature is genic then reset begin and end if necessary
  589     // nongenic features: irpartF, nonexonpartF
  590     if (begin < 0 || begin > hint->start)
  591     begin = hint->start;
  592     if (end < 0 || end < hint->end)
  593     end = hint->end;
  594     if (hint->type != irpartF && hint->type != nonexonpartF) {
  595     if (geneBegin <0 || geneBegin > hint->start)
  596         geneBegin = hint->start;
  597     if (geneEnd <0 || geneEnd < hint->end)
  598         geneEnd = hint->end;
  599     }
  600     if (hint->priority > priority)
  601     priority = hint->priority;
  602     if (hint->mult > copynumber)
  603       copynumber = hint->mult; 
  604 }
  605 
  606 /*
  607  * HintGroup::compatibleWith
  608  * Two HintGroups g1, g2 are compatible if a gene structure could exist that is compatible with both HintGroups.
  609  * (Could exist, because I don't check for ORFs and such.)
  610  * All pairs of features f1 in g2 and f2 in g2 that overlap must be compatible.
  611  * rascal1 and rascal2 are the hints that caused the incompatibility if any
  612  * weakerThan is true if this is strictly weaker than 'other'.
  613  */
  614 bool HintGroup::compatibleWith(HintGroup &other, Feature *&rascal1, Feature *&rascal2, bool &weakerThan){
  615     bool compatible = true;
  616     weakerThan = false;
  617     bool strictly = false;
  618     rascal1 = rascal2 = NULL;
  619     if (begin>other.end || end<other.begin)
  620     return compatible;
  621     // go through both feature lists and check all pairs of overlapping features
  622     if (!hints || !other.hints) 
  623     return true;
  624     bool featureWeakerThan, wt, sly;
  625     weakerThan = true;
  626     for (list<Feature*>::iterator f1it = hints->begin(); f1it != hints->end() && compatible; f1it++) {
  627     featureWeakerThan = false;
  628     for (list<Feature*>::iterator f2it = other.hints->begin(); f2it != other.hints->end() && compatible; f2it++) {
  629         compatible &= (*f1it)->compatibleWith(**f2it);
  630         if (!compatible){
  631         rascal1 = *f1it;
  632         rascal2 = *f2it;
  633         }
  634         wt = (*f1it)->weakerThan(**f2it, sly);
  635         featureWeakerThan |= wt;
  636         if (wt)
  637         strictly |= sly;
  638     }
  639     weakerThan &= featureWeakerThan;
  640     }
  641     if (weakerThan && !strictly) {
  642     // check if proper 'weaker than' relation comes from a subset of features (as opposed to a single feature being weaker)
  643     // make a simple approximation. If a feature of other lies completely outside the range of this and this is weaker than other
  644     // then it is also strictly weaker
  645     for (list<Feature*>::iterator f2it = other.hints->begin(); f2it != other.hints->end() && compatible; f2it++)
  646         if ((*f2it)->end < begin || (*f2it)->start > end) 
  647         strictly = true;
  648     }
  649     weakerThan &= strictly;
  650     if (!compatible)
  651     weakerThan = false;
  652     return compatible;
  653 }
  654 
  655 
  656 /*
  657  * HintGroup::updateFeatureConformance
  658  * Updates conformance of the Features of this group to the other group.
  659  * For each Feature f of 'this', numSupporting is increased by the multiplicity of 'other'
  660  * if there is one Feature in 'other' that supports f. numContradicting is increased for f if 
  661  * there is at least one hint in 'other' that contradicts f.
  662  */
  663 void HintGroup::updateFeatureConformance(HintGroup &other){
  664     if (end < other.begin || begin > other.end || hints == NULL || other.hints == NULL)
  665     return;
  666     // If this is uncommented then minor "dirt" exonpart hints in a well-supported intron are taken
  667     // seriously.
  668     //if (nestedGenePossible(other))
  669     //return;
  670     if (this == &other) {
  671     for(list<Feature*>::iterator f = hints->begin(); f != hints->end(); f++)
  672         (*f)->numSupporting += copynumber - 1;
  673     return;
  674     }
  675     bool supporting, contradicting, strictly, only_ep_conflicting_intron;
  676     bool lowerpriority = (other.priority < priority && other.priority >=0);
  677     float fractContra = 1.0;
  678     for(list<Feature*>::iterator f = hints->begin(); f != hints->end(); f++){
  679     supporting = false;
  680     contradicting = false;
  681     only_ep_conflicting_intron = true;
  682     for(list<Feature*>::iterator otherF = other.hints->begin(); otherF != other.hints->end(); otherF++){
  683         if (!lowerpriority && !(*f)->compatibleWith(**otherF)){
  684              contradicting = true;
  685          /*
  686           * special case: the amount an exonpart contradicts an intron depends on the lengths of the two
  687           * Reason: Assume constant ep length and constant splice frequencies. Then a longer intron
  688           * will have more ep hints that contradict it.
  689           */
  690          if ((*f)->type == intronF && ((*otherF)->type == exonpartF || 
  691                            (*otherF)->type == CDSpartF || 
  692                            (*otherF)->type == UTRpartF)){
  693            int ilen = (*f)->length();
  694            if (ilen > 2000) // exonpart hints covering a range of a full long exon can fully contradict an intron
  695              ilen = 2000;
  696            if (ilen < 1)
  697              ilen = 1;
  698            int eplen = (*otherF)->length();// todo: just consider overlap length
  699            if (eplen > ilen)
  700              eplen = ilen;
  701            fractContra = (float) eplen/ilen;
  702          } else {
  703            only_ep_conflicting_intron = false;
  704          }
  705         }
  706         
  707         if ((*f)->weakerThan(**otherF, strictly))
  708         supporting = true;
  709     }
  710     if (supporting && !contradicting)
  711         (*f)->numSupporting += other.copynumber;
  712     else if (contradicting){
  713         if (!only_ep_conflicting_intron)
  714             fractContra = 1.0;
  715         (*f)->numContradicting += fractContra * other.copynumber;
  716     }
  717     }
  718 }
  719 
  720 /*
  721  * HintGroup::nestedGenePossible
  722  * Return true if one hint group is contained in an intron of the other hintgroup.
  723  * Require minimal distance from intron of 50. This could be tightened by 
  724  * checking that the total mRNA of the inner gene must have enough space.
  725  */
  726 bool HintGroup::nestedGenePossible(HintGroup &other){
  727     for(list<Feature*>::iterator otherF = other.hints->begin(); otherF != other.hints->end(); otherF++)
  728     if ((*otherF)->type == intronF && (*otherF)->start < begin -50 && (*otherF)->end > end +50)
  729         return true;
  730     for(list<Feature*>::iterator thisF = hints->begin(); thisF != hints->end(); thisF++)
  731     if ((*thisF)->type == intronF && (*thisF)->start < other.begin -50 && (*thisF)->end > other.end +50)
  732         return true;
  733     return false;
  734 }
  735 
  736 /*
  737  * HintGroup::isTrashy
  738  * Mark the group to be discarded iff any feature of the group contradicts a large number of features of other groups
  739  * and is not supported by a sufficient number of other groups.
  740  */
  741 bool HintGroup::isTrashy(){
  742     if (hints == NULL)
  743     return false;
  744     float c;
  745     int s;
  746     list<Feature*>::iterator hit = hints->begin();
  747     while(hit != hints->end() && !trashy) {
  748     c = (*hit)->numContradicting;
  749     s = (*hit)->numSupporting;
  750     if (c/(s+1) >= Constant::max_contra_supp_ratio)
  751         trashy = true;
  752     /* was
  753        (s == 0 && c>=10) ||
  754        (s == 1 && c>=15) ||
  755        (s == 2 && c>=25) ||
  756        (s == 3 && c>=45) ||
  757        (s == 4 && c>=60) ||
  758        (s >= 5 && c/s >= 20))
  759     */
  760     hit++;
  761     }
  762     if (trashy) {
  763     //cout << "This group is trashy:"<<endl;
  764     //print(cout, true);
  765     setDiscardFlag(true);
  766     }
  767     return trashy;
  768 }
  769 
  770 /*
  771  * HintGroup::canCauseAltSplice
  772  * optional TODO: also implement a minimal multiplicity, e.g. intron:3,exon:1
  773  */
  774 bool HintGroup::canCauseAltSplice() {
  775   static set<FeatureType> *causingTypes = NULL;
  776   if (causingTypes == NULL){ // initialize on first call
  777     causingTypes = new set<FeatureType>;
  778     char *canCauseAltSpliceList = NULL;
  779     try {
  780       canCauseAltSpliceList = newstrcpy(Properties::getProperty( "canCauseAltSplice" ));
  781     } catch(...) {
  782       canCauseAltSpliceList = newstrcpy("intron,exon,tss,tts,start,stop,ass,dss,ip,CDS,CDSpart,UTR,UTRpart"); // default
  783     }
  784     char *typestr = strtok(canCauseAltSpliceList, ",");
  785     while (typestr != NULL) {
  786       FeatureType type = Feature::getFeatureType(typestr);
  787       if (type >= 0)
  788     causingTypes->insert(type);
  789       typestr = strtok (NULL, ",");
  790     }
  791     if (canCauseAltSpliceList)
  792       delete [] canCauseAltSpliceList;
  793   }
  794   bool ret = false;
  795   if (hints){
  796     list<Feature*>::iterator fit = hints->begin();
  797     while(ret == false && fit != hints->end()){
  798       ret |= (causingTypes->find((*fit)->type) != causingTypes->end()); // can cause altsplice if ANY of the types in the group match
  799       fit++;
  800     }
  801   }
  802   // was before a hack (for rGASP):
  803   // return hints && (hints->size()>1 || (hints->front()->type != exonpartF));
  804   return ret;
  805 }
  806 
  807 /*
  808  * HintGroup::setActiveFlag
  809  */
  810 void HintGroup::setActiveFlag(bool active){
  811     if (!hints)
  812     return;
  813     for (list<Feature*>::iterator hit = hints->begin(); hit != hints->end(); hit++)
  814     (*hit)->active = active;
  815 }
  816 
  817 /*
  818  * HintGroup::setDiscardFlag
  819  */
  820 void HintGroup::setDiscardFlag(bool discard){
  821     if (!hints)
  822     return;
  823     for (list<Feature*>::iterator hit = hints->begin(); hit != hints->end(); hit++){
  824     (*hit)->discard = discard;
  825     }
  826 }
  827 
  828 /*
  829  * HintGroup::addIncompGroup
  830  */
  831 void HintGroup::addIncompGroup(HintGroup *otherGroup){
  832     if (!otherGroup)
  833     return;
  834     if (otherGroup == this) {
  835     cerr << "addIncompGroup:Group incompatible to itself" << endl;
  836     return;
  837     }
  838     if (!incompGroups)
  839     incompGroups = new list<HintGroup*>;
  840     incompGroups->push_back(otherGroup);
  841 }
  842 
  843 /*
  844  * HintGroup::addStrongerGroup
  845  */
  846 void HintGroup::addStrongerGroup(HintGroup *otherGroup){
  847     if (!otherGroup)
  848     return;
  849     if (otherGroup == this) {
  850     cerr << "addStrongerGroup:Group stronger than itself" << endl;
  851     return;
  852     }
  853     if (!strongerGroups)
  854     strongerGroups = new list<HintGroup*>;
  855     strongerGroups->push_back(otherGroup);
  856 }
  857 
  858 /*
  859  * HintGroup::operator<
  860  * for sorting incrementally according to begin positions
  861  */
  862 bool operator<(const HintGroup& g1, const HintGroup& g2){    
  863     if (g1.begin < g2.begin)
  864     return true;
  865     if (g1.begin > g2.begin)
  866     return false;
  867     // both groups begin at the same position
  868     // go through the groups feature by feature and return true if the first differing feature is < in g1
  869     if (!g1.hints || !g2.hints)
  870     return false;
  871     list<Feature*>::iterator fit1 = g1.hints->begin(), fit2 = g2.hints->begin();
  872     while(fit1 != g1.hints->end() && fit2 != g2.hints->end()) {
  873     if ((*fit1)->start < (*fit2)->start || ((*fit1)->start == (*fit2)->start && (*fit1)->end < (*fit2)->end))
  874         return true;
  875     if (!((*fit1)->start == (*fit2)->start && (*fit1)->end == (*fit2)->end))
  876         return false;
  877     fit1++;
  878     fit2++;
  879     }
  880     return false;
  881 }
  882 
  883 /*
  884  * HintGroup::operator==
  885  */
  886 bool operator==(const HintGroup& g1, const HintGroup& g2){
  887     if (g1.begin != g2.begin || g1.end != g2.end)
  888     return false;
  889     if (g1.hints == NULL || g2.hints == NULL || g1.hints->size() != g2.hints->size())
  890     return false;
  891     list<Feature*>::iterator fit1, fit2;
  892     for (fit1 = g1.hints->begin(), fit2 = g2.hints->begin(); fit1 != g1.hints->end() && fit2 != g2.hints->end(); fit1++, fit2++) {
  893     if (!((**fit1) == (**fit2)) || ((*fit1)->bonus != (*fit2)->bonus))
  894         return false;
  895     }
  896     if (fit1 == g1.hints->end() && fit2 == g2.hints->end())
  897     return true;
  898     else 
  899     return false;
  900 }
  901 
  902 
  903 
  904 /*
  905  * HintGroup::print
  906  */
  907 void HintGroup::print(ostream& out, bool withHints){
  908   if (hints == NULL) {
  909     out << "HintGroup " << name << ", " << begin << "-" << end << ", mult= " << copynumber << ", priority= " << priority << " no features" << endl;
  910   } else {
  911     out << "HintGroup " << name << ", " << begin << "-" << end << ", mult= " << copynumber << ", priority= " << priority << " " << hints->size() << " features" << endl;
  912     if (withHints)
  913       for (list<Feature*>::iterator fit = hints->begin(); fit!= hints->end(); fit++)
  914     out << "\t" << **fit << endl;
  915   }
  916 }
  917