"Fossies" - the Fresh Open Source Software Archive

Member "tcpflow-1.6.1/src/be13_api/word_and_context_list.cpp" (19 Feb 2021, 4442 Bytes) of package /linux/misc/tcpflow-1.6.1.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "word_and_context_list.cpp" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 1.4.5_vs_1.5.0.

    1 /**
    2  * class word_and_context_list reads from disk and maintains in memory
    3  * a data structure that is used for the stop list and alert list.
    4  */
    5 
    6 #include "config.h"
    7 #include <sys/types.h>
    8 #include <inttypes.h>
    9 #include "word_and_context_list.h"
   10 #include "beregex.h"
   11 
   12 void word_and_context_list::add_regex(const std::string &pat)
   13 {
   14     patterns.push_back(new beregex(pat,0));
   15 }
   16 
   17 /**
   18  * Insert a feature and context, but only if not already present.
   19  * Returns true if added.
   20  */
   21 bool word_and_context_list::add_fc(const std::string &f,const std::string &c)
   22 {
   23     context ctx(f,c);           // ctx includes feature, before and after
   24 
   25     if(c.size()>0 && context_set.find(c) != context_set.end()) return false; // already present
   26     context_set.insert(c);      // now we've seen it.
   27     fcmap.insert(std::pair<std::string,context>(f,ctx));
   28     //if(fcmap.size()%100==0) std::cerr << "fcmap size=" << fcmap.size()  << "\n";
   29     return true;
   30 }
   31 
   32 /** returns 0 if success, -1 if fail. */
   33 int word_and_context_list::readfile(const std::string &filename)
   34 {
   35     std::ifstream i(filename.c_str());
   36     if(!i.is_open()) return -1;
   37     printf("Reading context stop list %s\n",filename.c_str());
   38     std::string line;
   39     uint64_t total_context=0;
   40     uint64_t line_counter = 0;
   41     uint64_t features_read = 0;
   42     while(getline(i,line)){
   43     line_counter++;
   44     if(line.size()==0) continue;
   45     if(line[0]=='#') continue; // it's a comment
   46     if((*line.end())=='\r'){
   47         line.erase(line.end()); /* remove the last character if it is a \r */
   48     }
   49     if(line.size()==0) continue;    // no line content
   50     ++features_read;
   51 
   52     // If there are two tabs, this is a line from a feature file
   53     size_t tab1 = line.find('\t');
   54     if(tab1!=std::string::npos){
   55         size_t tab2 = line.find('\t',tab1+1);
   56         if(tab2!=std::string::npos){
   57         size_t tab3 = line.find('\t',tab2+1);
   58         if(tab3==std::string::npos) tab3=line.size();
   59                 std::string f = line.substr(tab1+1,(tab2-1)-tab1);
   60                 std::string c = line.substr(tab2+1,(tab3-1)-tab2);
   61         if(add_fc(f,c)){
   62             ++total_context;
   63         }
   64         } else {
   65                 std::string f = line.substr(tab1+1);
   66         add_fc(f,"");       // Insert a feature with no context
   67         }
   68         continue;
   69     }
   70 
   71     // If there is no tab, then this must be a simple item to ignore.
   72     // If it is a regular expression, add it to the list of REs
   73     if(beregex::is_regex(line)){
   74         patterns.push_back(new beregex(line,REG_ICASE));
   75     } else {
   76         // Otherwise, add it as a feature with no context
   77         fcmap.insert(std::pair<std::string,context>(line,context(line)));
   78     }
   79     }
   80     std::cout << "Stop list read.\n";
   81     std::cout << "  Total features read: " << features_read << "\n";
   82     std::cout << "  List Size: " << fcmap.size() << "\n";
   83     std::cout << "  Context Strings: " << total_context << "\n";
   84     std::cout << "  Regular Expressions: " << patterns.size() << "\n";
   85     return 0;
   86 }
   87 
   88 /** check() is threadsafe. */
   89 bool word_and_context_list::check(const std::string &probe,const std::string &before,const std::string &after) const
   90 {
   91     /* First check literals, because they are faster */
   92     for(stopmap_t::const_iterator it =fcmap.find(probe);it!=fcmap.end();it++){
   93     if((rstrcmp((*it).second.before,before)==0) &&
   94        (rstrcmp((*it).second.after,after)==0) &&
   95        ((*it).second.feature==probe)){
   96         return true;
   97     }
   98     }
   99 
  100     /* Now check the patterns; do this second */
  101     for(beregex_vector::const_iterator it=patterns.begin(); it != patterns.end(); it++){
  102     if((*it)->search(probe,0,0,0)){
  103         return true;        // yep
  104     }
  105     }
  106     return false;
  107 };
  108 
  109 bool word_and_context_list::check_feature_context(const std::string &probe,const std::string &context) const 
  110 {
  111     std::string before;
  112     std::string after;
  113     context::extract_before_after(probe,context,before,after);
  114     return check(probe,before,after);
  115 }
  116 
  117 void word_and_context_list::dump()
  118 {
  119     std::cout << "dump context list:\n";
  120     for(stopmap_t::const_iterator it =fcmap.begin();it!=fcmap.end();it++){
  121     std::cout << (*it).first << " = " << (*it).second << "\n";
  122     }
  123     std::cout << "dump RE list:\n";
  124     for(beregex_vector::const_iterator it=patterns.begin(); it != patterns.end(); it++){
  125     std::cout << (*it)->pat << "\n";
  126     }
  127 }
  128 
  129 #ifdef STAND
  130 int  main(int argc,char **argv)
  131 {
  132     cout << "testing contxt_list\n";
  133     word_and_context_list cl;
  134     while(--argc){
  135     argv++;
  136     if(cl.readfile(*argv)){
  137         err(1,"Cannot read %s",*argv);
  138     }
  139     }
  140     cl.dump();
  141     exit(1);
  142 }
  143 #endif