"Fossies" - the Fresh Open Source Software Archive

Member "tcpflow-1.6.1/src/be13_api/word_and_context_list.h" (19 Feb 2021, 4962 Bytes) of package /linux/misc/tcpflow-1.6.1.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "word_and_context_list.h" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 1.4.5_vs_1.5.0.

    1 #ifndef WORD_AND_CONTEXT_LIST_H
    2 #define WORD_AND_CONTEXT_LIST_H
    3 
    4 #include "beregex.h"
    5 
    6 /**
    7  * \addtogroup internal_interfaces
    8  * @{
    9  * \file
   10  * word_and_context_list:
   11  *
   12  * A re-implementation of the basic stop list, regular expression
   13  * stop_list, and context-sensitive stop list.
   14  *
   15  * Method:
   16  * Each entry in the stop list can be represented as:
   17  * - a feature that is stopped, with optional context.
   18  * - a regular expression
   19  * 
   20  * Context is represented as a std::string before the feature and a std::string after.
   21  * 
   22  * The stop list contains is a map of features that are stopped. 
   23  * For each feature, there may be no context or a list of context. 
   24  * If there is no context and the feature is in the list, 
   25  */
   26 
   27 /*
   28  * context is a class that records the feature, the text before, and the text after.
   29  * Typically this is used for stop lists and alert lists. 
   30  */
   31 
   32 #if defined(HAVE_UNORDERED_SET)
   33 #include <unordered_set>
   34 #else
   35 #if defined(HAVE_TR1_UNORDERED_SET)
   36 #include <tr1/unordered_set>
   37 #endif
   38 #endif
   39 
   40 /* <unordered_map> includes both unordered_map and unordered_multimap */
   41 #if defined(HAVE_UNORDERED_MAP)
   42 #include <unordered_map>
   43 #else
   44 #if defined(HAVE_TR1_UNORDERED_MAP)
   45 #include <tr1/unordered_map>
   46 #endif
   47 #endif
   48 
   49 #include <algorithm>
   50 #include <set>
   51 #include <map>                          // brings in map and multimap
   52 
   53 class context {
   54 public:
   55     static void extract_before_after(const std::string &feature,const std::string &ctx,
   56                                      std::string &before,std::string &after){
   57     if(feature.size() <= ctx.size()){
   58         /* The most simple algorithm is a sliding window */
   59         for(size_t i = 0;i<ctx.size() - feature.size();i++){
   60         if(ctx.substr(i,feature.size())==feature){
   61             before = ctx.substr(0,i);
   62             after  = ctx.substr(i+feature.size());
   63             return;
   64         }
   65         }
   66     }
   67     before.clear();         // can't be done
   68     after.clear();
   69     }
   70 
   71     // constructors to make a context with nothing before or after, with just a context, or with all three
   72     context(const std::string &f):feature(f),before(),after(){}
   73     context(const std::string &f,const std::string &c):feature(f),before(),after(){
   74     extract_before_after(f,c,before,after);
   75     }
   76     context(const std::string &f,const std::string &b,const std::string &a):feature(f),before(b),after(a){}
   77     std::string feature;
   78     std::string before;
   79     std::string after;
   80 };
   81 
   82 inline std::ostream & operator <<(std::ostream &os,const class context &c)
   83 {
   84     os << "context[" << c.before << "|" << c.feature  << "|" << c.after << "]";
   85     return os;
   86 }
   87 inline bool operator ==(const class context &a,const class context &b)
   88 {
   89     return (a.feature==b.feature) && (a.before==b.before) && (a.after==b.after);
   90 }
   91 
   92 /**
   93  * the object that holds the word and context list
   94  * They aren't atomic, but they are read-only.
   95  */
   96 class word_and_context_list {
   97 private:
   98 #if defined(HAVE_UNORDERED_MAP)
   99     typedef std::unordered_multimap<std::string,context> stopmap_t;
  100 #else
  101 #if defined(HAVE_TR1_UNORDERED_MAP)
  102     typedef std::tr1::unordered_multimap<std::string,context> stopmap_t;
  103 #else
  104     typedef std::multimap<std::string,context> stopmap_t;
  105 #endif
  106 #endif
  107     stopmap_t fcmap;            // maps features to contexts; for finding them
  108 
  109 #if defined(HAVE_UNORDERED_SET)
  110     typedef std::unordered_set< std::string > stopset_t;
  111 #else
  112 #if defined(HAVE_TR1_UNORDERED_SET)
  113     typedef std::tr1::unordered_set< std::string > stopset_t;
  114 #else
  115     typedef std::set< std::string > stopset_t;
  116 #endif
  117 #endif
  118     stopset_t context_set;          // presence of a pair in fcmap
  119 
  120     beregex_vector patterns;
  121 public:
  122     /**
  123      * rstrcmp is like strcmp, except it compares std::strings right-aligned
  124      * and only compares the minimum sized std::string of the two.
  125      */
  126     static int rstrcmp(const std::string &a,const std::string &b);
  127 
  128     word_and_context_list():fcmap(),context_set(),patterns(){ }
  129     ~word_and_context_list(){
  130     for(beregex_vector::iterator it=patterns.begin(); it != patterns.end(); it++){
  131         delete *it;
  132     }
  133     }
  134     size_t size(){ return fcmap.size() + patterns.size();}
  135     void add_regex(const std::string &pat); // not threadsafe
  136     bool add_fc(const std::string &f,const std::string &c); // not threadsafe
  137     int readfile(const std::string &fname); // not threadsafe
  138 
  139     // return true if the probe with context is in the list or in the stopmap
  140     bool check(const std::string &probe,const std::string &before, const std::string &after) const; // threadsafe
  141     bool check_feature_context(const std::string &probe,const std::string &context) const; // threadsafe
  142     void dump();
  143 };
  144 
  145 
  146 inline int word_and_context_list::rstrcmp(const std::string &a,const std::string &b)
  147 {
  148     size_t alen = a.size();
  149     size_t blen = b.size();
  150     size_t len = alen < blen ? alen : blen;
  151     for(size_t i=0;i<len;i++){
  152     size_t apos = alen - len + i;
  153     size_t bpos = blen - len + i;
  154     if(a[apos] < b[bpos]) return -1;
  155     if(a[apos] > b[bpos]) return 1;
  156     }
  157     return 0;
  158 }
  159 
  160 #endif