tcpflow  1.6.1
About: tcpflow is a TCP/IP packet demultiplexer that captures data transmitted as part of TCP connections (flows), and stores the data in a way that is convenient for protocol analysis and debugging.
  Fossies Dox: tcpflow-1.6.1.tar.gz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

word_and_context_list.cpp
Go to the documentation of this file.
1 /**
2  * class word_and_context_list reads from disk and maintains in memory
3  * a data structure that is used for the stop list and alert list.
4  */
5 
6 #include "config.h"
7 #include <sys/types.h>
8 #include <inttypes.h>
10 #include "beregex.h"
11 
12 void word_and_context_list::add_regex(const std::string &pat)
13 {
14  patterns.push_back(new beregex(pat,0));
15 }
16 
17 /**
18  * Insert a feature and context, but only if not already present.
19  * Returns true if added.
20  */
21 bool word_and_context_list::add_fc(const std::string &f,const std::string &c)
22 {
23  context ctx(f,c); // ctx includes feature, before and after
24 
25  if(c.size()>0 && context_set.find(c) != context_set.end()) return false; // already present
26  context_set.insert(c); // now we've seen it.
27  fcmap.insert(std::pair<std::string,context>(f,ctx));
28  //if(fcmap.size()%100==0) std::cerr << "fcmap size=" << fcmap.size() << "\n";
29  return true;
30 }
31 
32 /** returns 0 if success, -1 if fail. */
33 int word_and_context_list::readfile(const std::string &filename)
34 {
35  std::ifstream i(filename.c_str());
36  if(!i.is_open()) return -1;
37  printf("Reading context stop list %s\n",filename.c_str());
38  std::string line;
39  uint64_t total_context=0;
40  uint64_t line_counter = 0;
41  uint64_t features_read = 0;
42  while(getline(i,line)){
43  line_counter++;
44  if(line.size()==0) continue;
45  if(line[0]=='#') continue; // it's a comment
46  if((*line.end())=='\r'){
47  line.erase(line.end()); /* remove the last character if it is a \r */
48  }
49  if(line.size()==0) continue; // no line content
50  ++features_read;
51 
52  // If there are two tabs, this is a line from a feature file
53  size_t tab1 = line.find('\t');
54  if(tab1!=std::string::npos){
55  size_t tab2 = line.find('\t',tab1+1);
56  if(tab2!=std::string::npos){
57  size_t tab3 = line.find('\t',tab2+1);
58  if(tab3==std::string::npos) tab3=line.size();
59  std::string f = line.substr(tab1+1,(tab2-1)-tab1);
60  std::string c = line.substr(tab2+1,(tab3-1)-tab2);
61  if(add_fc(f,c)){
62  ++total_context;
63  }
64  } else {
65  std::string f = line.substr(tab1+1);
66  add_fc(f,""); // Insert a feature with no context
67  }
68  continue;
69  }
70 
71  // If there is no tab, then this must be a simple item to ignore.
72  // If it is a regular expression, add it to the list of REs
73  if(beregex::is_regex(line)){
74  patterns.push_back(new beregex(line,REG_ICASE));
75  } else {
76  // Otherwise, add it as a feature with no context
77  fcmap.insert(std::pair<std::string,context>(line,context(line)));
78  }
79  }
80  std::cout << "Stop list read.\n";
81  std::cout << " Total features read: " << features_read << "\n";
82  std::cout << " List Size: " << fcmap.size() << "\n";
83  std::cout << " Context Strings: " << total_context << "\n";
84  std::cout << " Regular Expressions: " << patterns.size() << "\n";
85  return 0;
86 }
87 
88 /** check() is threadsafe. */
89 bool word_and_context_list::check(const std::string &probe,const std::string &before,const std::string &after) const
90 {
91  /* First check literals, because they are faster */
92  for(stopmap_t::const_iterator it =fcmap.find(probe);it!=fcmap.end();it++){
93  if((rstrcmp((*it).second.before,before)==0) &&
94  (rstrcmp((*it).second.after,after)==0) &&
95  ((*it).second.feature==probe)){
96  return true;
97  }
98  }
99 
100  /* Now check the patterns; do this second */
101  for(beregex_vector::const_iterator it=patterns.begin(); it != patterns.end(); it++){
102  if((*it)->search(probe,0,0,0)){
103  return true; // yep
104  }
105  }
106  return false;
107 };
108 
109 bool word_and_context_list::check_feature_context(const std::string &probe,const std::string &context) const
110 {
111  std::string before;
112  std::string after;
113  context::extract_before_after(probe,context,before,after);
114  return check(probe,before,after);
115 }
116 
118 {
119  std::cout << "dump context list:\n";
120  for(stopmap_t::const_iterator it =fcmap.begin();it!=fcmap.end();it++){
121  std::cout << (*it).first << " = " << (*it).second << "\n";
122  }
123  std::cout << "dump RE list:\n";
124  for(beregex_vector::const_iterator it=patterns.begin(); it != patterns.end(); it++){
125  std::cout << (*it)->pat << "\n";
126  }
127 }
128 
129 #ifdef STAND
130 int main(int argc,char **argv)
131 {
132  cout << "testing contxt_list\n";
134  while(--argc){
135  argv++;
136  if(cl.readfile(*argv)){
137  err(1,"Cannot read %s",*argv);
138  }
139  }
140  cl.dump();
141  exit(1);
142 }
143 #endif
static bool is_regex(const std::string &str)
Definition: beregex.cpp:41
bool add_fc(const std::string &f, const std::string &c)
bool check(const std::string &probe, const std::string &before, const std::string &after) const
static void extract_before_after(const std::string &feature, const std::string &ctx, std::string &before, std::string &after)
void add_regex(const std::string &pat)
int readfile(const std::string &fname)
static int rstrcmp(const std::string &a, const std::string &b)
bool check_feature_context(const std::string &probe, const std::string &context) const
int c
Definition: tcpdemux.cpp:366
int main(int argc, char *argv[])
Definition: tcpflow.cpp:565
void err(int eval, const char *fmt,...)
Definition: utils.cpp:33