"Fossies" - the Fresh Open Source Software Archive 
Member "tcpflow-1.6.1/src/be13_api/word_and_context_list.cpp" (19 Feb 2021, 4442 Bytes) of package /linux/misc/tcpflow-1.6.1.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "word_and_context_list.cpp" see the
Fossies "Dox" file reference documentation and the last
Fossies "Diffs" side-by-side code changes report:
1.4.5_vs_1.5.0.
1 /**
2 * class word_and_context_list reads from disk and maintains in memory
3 * a data structure that is used for the stop list and alert list.
4 */
5
6 #include "config.h"
7 #include <sys/types.h>
8 #include <inttypes.h>
9 #include "word_and_context_list.h"
10 #include "beregex.h"
11
12 void word_and_context_list::add_regex(const std::string &pat)
13 {
14 patterns.push_back(new beregex(pat,0));
15 }
16
17 /**
18 * Insert a feature and context, but only if not already present.
19 * Returns true if added.
20 */
21 bool word_and_context_list::add_fc(const std::string &f,const std::string &c)
22 {
23 context ctx(f,c); // ctx includes feature, before and after
24
25 if(c.size()>0 && context_set.find(c) != context_set.end()) return false; // already present
26 context_set.insert(c); // now we've seen it.
27 fcmap.insert(std::pair<std::string,context>(f,ctx));
28 //if(fcmap.size()%100==0) std::cerr << "fcmap size=" << fcmap.size() << "\n";
29 return true;
30 }
31
32 /** returns 0 if success, -1 if fail. */
33 int word_and_context_list::readfile(const std::string &filename)
34 {
35 std::ifstream i(filename.c_str());
36 if(!i.is_open()) return -1;
37 printf("Reading context stop list %s\n",filename.c_str());
38 std::string line;
39 uint64_t total_context=0;
40 uint64_t line_counter = 0;
41 uint64_t features_read = 0;
42 while(getline(i,line)){
43 line_counter++;
44 if(line.size()==0) continue;
45 if(line[0]=='#') continue; // it's a comment
46 if((*line.end())=='\r'){
47 line.erase(line.end()); /* remove the last character if it is a \r */
48 }
49 if(line.size()==0) continue; // no line content
50 ++features_read;
51
52 // If there are two tabs, this is a line from a feature file
53 size_t tab1 = line.find('\t');
54 if(tab1!=std::string::npos){
55 size_t tab2 = line.find('\t',tab1+1);
56 if(tab2!=std::string::npos){
57 size_t tab3 = line.find('\t',tab2+1);
58 if(tab3==std::string::npos) tab3=line.size();
59 std::string f = line.substr(tab1+1,(tab2-1)-tab1);
60 std::string c = line.substr(tab2+1,(tab3-1)-tab2);
61 if(add_fc(f,c)){
62 ++total_context;
63 }
64 } else {
65 std::string f = line.substr(tab1+1);
66 add_fc(f,""); // Insert a feature with no context
67 }
68 continue;
69 }
70
71 // If there is no tab, then this must be a simple item to ignore.
72 // If it is a regular expression, add it to the list of REs
73 if(beregex::is_regex(line)){
74 patterns.push_back(new beregex(line,REG_ICASE));
75 } else {
76 // Otherwise, add it as a feature with no context
77 fcmap.insert(std::pair<std::string,context>(line,context(line)));
78 }
79 }
80 std::cout << "Stop list read.\n";
81 std::cout << " Total features read: " << features_read << "\n";
82 std::cout << " List Size: " << fcmap.size() << "\n";
83 std::cout << " Context Strings: " << total_context << "\n";
84 std::cout << " Regular Expressions: " << patterns.size() << "\n";
85 return 0;
86 }
87
88 /** check() is threadsafe. */
89 bool word_and_context_list::check(const std::string &probe,const std::string &before,const std::string &after) const
90 {
91 /* First check literals, because they are faster */
92 for(stopmap_t::const_iterator it =fcmap.find(probe);it!=fcmap.end();it++){
93 if((rstrcmp((*it).second.before,before)==0) &&
94 (rstrcmp((*it).second.after,after)==0) &&
95 ((*it).second.feature==probe)){
96 return true;
97 }
98 }
99
100 /* Now check the patterns; do this second */
101 for(beregex_vector::const_iterator it=patterns.begin(); it != patterns.end(); it++){
102 if((*it)->search(probe,0,0,0)){
103 return true; // yep
104 }
105 }
106 return false;
107 };
108
109 bool word_and_context_list::check_feature_context(const std::string &probe,const std::string &context) const
110 {
111 std::string before;
112 std::string after;
113 context::extract_before_after(probe,context,before,after);
114 return check(probe,before,after);
115 }
116
117 void word_and_context_list::dump()
118 {
119 std::cout << "dump context list:\n";
120 for(stopmap_t::const_iterator it =fcmap.begin();it!=fcmap.end();it++){
121 std::cout << (*it).first << " = " << (*it).second << "\n";
122 }
123 std::cout << "dump RE list:\n";
124 for(beregex_vector::const_iterator it=patterns.begin(); it != patterns.end(); it++){
125 std::cout << (*it)->pat << "\n";
126 }
127 }
128
129 #ifdef STAND
130 int main(int argc,char **argv)
131 {
132 cout << "testing contxt_list\n";
133 word_and_context_list cl;
134 while(--argc){
135 argv++;
136 if(cl.readfile(*argv)){
137 err(1,"Cannot read %s",*argv);
138 }
139 }
140 cl.dump();
141 exit(1);
142 }
143 #endif