tcpflow  1.6.1
About: tcpflow is a TCP/IP packet demultiplexer that captures data transmitted as part of TCP connections (flows), and stores the data in a way that is convenient for protocol analysis and debugging.
  Fossies Dox: tcpflow-1.6.1.tar.gz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

beregex.cpp
Go to the documentation of this file.
1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 
3 #include "config.h"
4 #include "beregex.h"
5 
6 #include <sys/types.h>
7 #include <inttypes.h>
8 #include <stdlib.h>
9 #include <unistd.h>
10 
11 
12 #if defined(HAVE_LIBTRE) && defined(HAVE_TRE_REGCOMP) && defined(HAVE_TRE_TRE_H)
13 #define REGCOMP tre_regcomp
14 #define REGFREE tre_regfree
15 #define REGEXEC tre_regexec
16 #define nreg (regex_t *)nreg_
17 #define HAVE_REGULAR_EXPRESSIONS
18 static const char *regex_version = "tre";
19 #endif
20 
21 /* use regcomp() if tre_regcomp() is not available */
22 #if defined(HAVE_REGCOMP) && !defined(HAVE_REGULAR_EXPRESSIONS)
23 #define REGCOMP regcomp
24 #define REGFREE regfree
25 #define REGEXEC regexec
26 #define nreg (regex_t *)nreg_
27 #define HAVE_REGULAR_EXPRESSIONS
28 static const char *regex_version = "system";
29 #endif
30 
31 #ifndef HAVE_REGULAR_EXPRESSIONS
32 #error bulk_extractor requires tre_regcomp or regcomp to run
33 #error download tre from "http://laurikari.net/tre/download/"
34 #endif
35 
36 const char *beregex::version(){return regex_version;}
37 
38 /* Only certain characters are assumed to be a regular expression. These characters are
39  * coincidently never in email addresses.
40  */
41 bool beregex::is_regex(const std::string &str)
42 {
43  for(std::string::const_iterator it = str.begin();it!=str.end();it++){
44  switch(*it){
45  case '*':
46  case '[':
47  case '(':
48  return true;
49  }
50  }
51  return false;
52 }
53 
54 beregex::beregex(const beregex &that):pat(that.pat),flags(that.flags),nreg_(0)
55 {
56  compile();
57 }
58 
59 beregex::beregex(std::string pat_,int flags_):pat(pat_),flags(flags_),nreg_(0)
60 {
61  compile();
62 }
63 
64 void beregex::compile() // compile the regex
65 {
66  if(pat.size()==0) return;
67  nreg_ = calloc(sizeof(regex_t),1);
68  if(REGCOMP(nreg,pat.c_str(),flags | REG_EXTENDED)!=0){
69  std::cerr << "regular expression compile error '" << pat << "' flags=" << flags << "\n";
70  exit(1);
71  }
72 }
74  if(nreg_){
75  REGFREE(nreg);
76  free(nreg_);
77  nreg_ = 0;
78  }
79 }
80 /**
81  * perform a search for a single hit. If there is a group and something is found,
82  * set *found to be what was found, *offset to be the starting offset, and *len to be
83  * the length. Note that this only handles a single group.
84  */
85 int beregex::search(const std::string &line,std::string *found,size_t *offset,size_t *len) const
86 {
87  static const int REGMAX=2;
88  regmatch_t pmatch[REGMAX];
89  if(!nreg_) return 0;
90  memset(pmatch,0,sizeof(pmatch));
91  int r = REGEXEC(nreg,line.c_str(),REGMAX,pmatch,0);
92  if(r==REG_NOMATCH) return 0;
93  if(r!=0) return 0; /* some kind of failure */
94  /* Make copies of the first group */
95  if(pmatch[1].rm_so != pmatch[1].rm_eo){
96  if(found) *found = line.substr(pmatch[1].rm_so,pmatch[1].rm_eo-pmatch[1].rm_so);
97  if(offset) *offset = pmatch[1].rm_so;
98  if(len) *len = pmatch[1].rm_eo-pmatch[1].rm_so;
99  }
100  return 1; /* success */
101 }
102 /** Perform a search with an array of strings. Return 0 if success, return code if fail.*/
103 
104 int beregex::search(const std::string &line,std::string *matches,int REGMAX) const {
105  if(!nreg) return 0;
106  regmatch_t *pmatch = (regmatch_t *)calloc(sizeof(regmatch_t),REGMAX+1);
107  int r = REGEXEC(nreg,line.c_str(),REGMAX+1,pmatch,0);
108  if(r==0){
109  for(int i=0;i<REGMAX;i++){
110  size_t start = pmatch[i+1].rm_so;
111  size_t len = pmatch[i+1].rm_eo-pmatch[i+1].rm_so;
112  matches[i] = line.substr(start,len);
113  }
114  }
115  free(pmatch);
116  return r;
117 }
118 
119 std::string beregex::search(const std::string &line) const
120 {
121  if(!nreg) return std::string();
122  regmatch_t pmatch[2];
123  memset(pmatch,0,sizeof(pmatch));
124  if(REGEXEC(nreg,line.c_str(),2,pmatch,0)==0){
125  size_t start = pmatch[1].rm_so;
126  size_t len = pmatch[1].rm_eo-pmatch[1].rm_so;
127  return line.substr(start,len);
128  }
129  else {
130  return std::string();
131  }
132 }
133 
134 int regex_list::readfile(std::string fname)
135 {
136  std::ifstream f(fname.c_str());
137  if(f.is_open()){
138  while(!f.eof()){
139  std::string line;
140  getline(f,line);
141  if(line.size()>0 && (*line.end())=='\r'){
142  line.erase(line.end()); /* remove the last character while it is a \n or \r */
143  }
144  patterns.push_back(new beregex(line,0));
145  }
146  f.close();
147  return 0;
148  }
149  return -1;
150 }
151 
152 void regex_list::add_regex(const std::string &pat)
153 {
154  patterns.push_back(new beregex(pat,0));
155 }
156 
157 
158 /* Find the FIRST match in buf */
159 bool regex_list::check(const std::string &buf,std::string *found, size_t *offset,size_t *len) const
160 {
161  /* Now check check pattern */
162  /* First check literals, because they are faster */
163  bool first = true;
164  bool fnd = false;
165  for(std::vector<beregex *>::const_iterator it=patterns.begin(); it != patterns.end(); it++){
166  std::string nfound;
167  size_t noffset=0;
168  size_t nlen=0;
169  if((*it)->search(buf,&nfound,&noffset,&nlen)){
170  if(first || noffset<*offset){
171  fnd = true;
172  *found = nfound;
173  *offset = noffset;
174  *len = nlen;
175  first = false;
176  }
177  }
178  }
179  return fnd;
180 }
181 
beregex(const beregex &that)
Definition: beregex.cpp:54
~beregex()
Definition: beregex.cpp:73
void * nreg_
Definition: beregex.h:44
static const char * version()
Definition: beregex.cpp:36
void compile()
Definition: beregex.cpp:64
static bool is_regex(const std::string &str)
Definition: beregex.cpp:41
int search(const std::string &line, std::string *found, size_t *offset, size_t *len) const
Definition: beregex.cpp:85
std::string pat
Definition: beregex.h:39
int readfile(std::string fname)
Definition: beregex.cpp:134
void add_regex(const std::string &pat)
Definition: beregex.cpp:152
bool check(const std::string &probe, std::string *found, size_t *offset, size_t *len) const
Definition: beregex.cpp:159
std::vector< beregex * > patterns
Definition: beregex.h:67
flags
Definition: http_parser.h:216