"Fossies" - the Fresh Open Source Software Archive 
Member "tcpflow-1.6.1/src/be13_api/beregex.cpp" (19 Feb 2021, 5347 Bytes) of package /linux/misc/tcpflow-1.6.1.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "beregex.cpp" see the
Fossies "Dox" file reference documentation and the last
Fossies "Diffs" side-by-side code changes report:
1.4.5_vs_1.5.0.
1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2
3 #include "config.h"
4 #include "beregex.h"
5
6 #include <sys/types.h>
7 #include <inttypes.h>
8 #include <stdlib.h>
9 #include <unistd.h>
10
11
12 #if defined(HAVE_LIBTRE) && defined(HAVE_TRE_REGCOMP) && defined(HAVE_TRE_TRE_H)
13 #define REGCOMP tre_regcomp
14 #define REGFREE tre_regfree
15 #define REGEXEC tre_regexec
16 #define nreg (regex_t *)nreg_
17 #define HAVE_REGULAR_EXPRESSIONS
18 static const char *regex_version = "tre";
19 #endif
20
21 /* use regcomp() if tre_regcomp() is not available */
22 #if defined(HAVE_REGCOMP) && !defined(HAVE_REGULAR_EXPRESSIONS)
23 #define REGCOMP regcomp
24 #define REGFREE regfree
25 #define REGEXEC regexec
26 #define nreg (regex_t *)nreg_
27 #define HAVE_REGULAR_EXPRESSIONS
28 static const char *regex_version = "system";
29 #endif
30
31 #ifndef HAVE_REGULAR_EXPRESSIONS
32 #error bulk_extractor requires tre_regcomp or regcomp to run
33 #error download tre from "http://laurikari.net/tre/download/"
34 #endif
35
36 const char *beregex::version(){return regex_version;}
37
38 /* Only certain characters are assumed to be a regular expression. These characters are
39 * coincidently never in email addresses.
40 */
41 bool beregex::is_regex(const std::string &str)
42 {
43 for(std::string::const_iterator it = str.begin();it!=str.end();it++){
44 switch(*it){
45 case '*':
46 case '[':
47 case '(':
48 return true;
49 }
50 }
51 return false;
52 }
53
54 beregex::beregex(const beregex &that):pat(that.pat),flags(that.flags),nreg_(0)
55 {
56 compile();
57 }
58
59 beregex::beregex(std::string pat_,int flags_):pat(pat_),flags(flags_),nreg_(0)
60 {
61 compile();
62 }
63
64 void beregex::compile() // compile the regex
65 {
66 if(pat.size()==0) return;
67 nreg_ = calloc(sizeof(regex_t),1);
68 if(REGCOMP(nreg,pat.c_str(),flags | REG_EXTENDED)!=0){
69 std::cerr << "regular expression compile error '" << pat << "' flags=" << flags << "\n";
70 exit(1);
71 }
72 }
73 beregex::~beregex(){
74 if(nreg_){
75 REGFREE(nreg);
76 free(nreg_);
77 nreg_ = 0;
78 }
79 }
80 /**
81 * perform a search for a single hit. If there is a group and something is found,
82 * set *found to be what was found, *offset to be the starting offset, and *len to be
83 * the length. Note that this only handles a single group.
84 */
85 int beregex::search(const std::string &line,std::string *found,size_t *offset,size_t *len) const
86 {
87 static const int REGMAX=2;
88 regmatch_t pmatch[REGMAX];
89 if(!nreg_) return 0;
90 memset(pmatch,0,sizeof(pmatch));
91 int r = REGEXEC(nreg,line.c_str(),REGMAX,pmatch,0);
92 if(r==REG_NOMATCH) return 0;
93 if(r!=0) return 0; /* some kind of failure */
94 /* Make copies of the first group */
95 if(pmatch[1].rm_so != pmatch[1].rm_eo){
96 if(found) *found = line.substr(pmatch[1].rm_so,pmatch[1].rm_eo-pmatch[1].rm_so);
97 if(offset) *offset = pmatch[1].rm_so;
98 if(len) *len = pmatch[1].rm_eo-pmatch[1].rm_so;
99 }
100 return 1; /* success */
101 }
102 /** Perform a search with an array of strings. Return 0 if success, return code if fail.*/
103
104 int beregex::search(const std::string &line,std::string *matches,int REGMAX) const {
105 if(!nreg) return 0;
106 regmatch_t *pmatch = (regmatch_t *)calloc(sizeof(regmatch_t),REGMAX+1);
107 int r = REGEXEC(nreg,line.c_str(),REGMAX+1,pmatch,0);
108 if(r==0){
109 for(int i=0;i<REGMAX;i++){
110 size_t start = pmatch[i+1].rm_so;
111 size_t len = pmatch[i+1].rm_eo-pmatch[i+1].rm_so;
112 matches[i] = line.substr(start,len);
113 }
114 }
115 free(pmatch);
116 return r;
117 }
118
119 std::string beregex::search(const std::string &line) const
120 {
121 if(!nreg) return std::string();
122 regmatch_t pmatch[2];
123 memset(pmatch,0,sizeof(pmatch));
124 if(REGEXEC(nreg,line.c_str(),2,pmatch,0)==0){
125 size_t start = pmatch[1].rm_so;
126 size_t len = pmatch[1].rm_eo-pmatch[1].rm_so;
127 return line.substr(start,len);
128 }
129 else {
130 return std::string();
131 }
132 }
133
134 int regex_list::readfile(std::string fname)
135 {
136 std::ifstream f(fname.c_str());
137 if(f.is_open()){
138 while(!f.eof()){
139 std::string line;
140 getline(f,line);
141 if(line.size()>0 && (*line.end())=='\r'){
142 line.erase(line.end()); /* remove the last character while it is a \n or \r */
143 }
144 patterns.push_back(new beregex(line,0));
145 }
146 f.close();
147 return 0;
148 }
149 return -1;
150 }
151
152 void regex_list::add_regex(const std::string &pat)
153 {
154 patterns.push_back(new beregex(pat,0));
155 }
156
157
158 /* Find the FIRST match in buf */
159 bool regex_list::check(const std::string &buf,std::string *found, size_t *offset,size_t *len) const
160 {
161 /* Now check check pattern */
162 /* First check literals, because they are faster */
163 bool first = true;
164 bool fnd = false;
165 for(std::vector<beregex *>::const_iterator it=patterns.begin(); it != patterns.end(); it++){
166 std::string nfound;
167 size_t noffset=0;
168 size_t nlen=0;
169 if((*it)->search(buf,&nfound,&noffset,&nlen)){
170 if(first || noffset<*offset){
171 fnd = true;
172 *found = nfound;
173 *offset = noffset;
174 *len = nlen;
175 first = false;
176 }
177 }
178 }
179 return fnd;
180 }
181