"Fossies" - the Fresh Open Source Software Archive 
Member "tcpflow-1.6.1/src/be13_api/feature_recorder_sql.cpp" (19 Feb 2021, 13079 Bytes) of package /linux/misc/tcpflow-1.6.1.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "feature_recorder_sql.cpp" see the
Fossies "Dox" file reference documentation and the last
Fossies "Diffs" side-by-side code changes report:
1.4.5_vs_1.5.0.
1 /*
2 * Feature recorder mods for writing features into an SQLite3 database.
3 */
4
5 /* http://blog.quibb.org/2010/08/fast-bulk-inserts-into-sqlite/ */
6
7 #include "config.h"
8
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <unistd.h>
13 #include <sbuf.h>
14
15 #include "bulk_extractor_i.h"
16 #include "histogram.h"
17
18 /*
19 * Time results with ubnist1 on R4:
20 * no SQL - 79 seconds
21 * no pragmas - 651 seconds
22 * "PRAGMA synchronous = OFF", - 146 second
23 * "PRAGMA synchronous = OFF", "PRAGMA journal_mode=MEMORY", - 79 seconds
24 *
25 * Time with domexusers:
26 * no SQL -
27 */
28
29
30 #if defined(HAVE_LIBSQLITE3) && defined(HAVE_SQLITE3_H)
31 #define USE_SQLITE3
32 #endif
33 #define SQLITE_EXTENSION ".sqlite"
34
35 #ifndef SQLITE_DETERMINISTIC
36 #define SQLITE_DETERMINISTIC 0
37 #endif
38
39 static int debug = 0;
40
41 #ifdef USE_SQLITE3
42 static const char *schema_db[] = {
43 "PRAGMA synchronous = OFF",
44 "PRAGMA journal_mode=MEMORY",
45 //"PRAGMA temp_store=MEMORY", // did not improve performance
46 "PRAGMA cache_size = 200000",
47 "CREATE TABLE IF NOT EXISTS db_info (schema_ver INTEGER, bulk_extractor_ver INTEGER)",
48 "INSERT INTO db_info (schema_ver, bulk_extractor_ver) VALUES (1,1)",
49 "CREATE TABLE IF NOT EXISTS be_features (tablename VARCHAR,comment TEXT)",
50 "CREATE TABLE IF NOT EXISTS be_config (name VARCHAR,value VARCHAR)",
51 0};
52
53 /* Create a feature table and note that it has been created in be_features */
54 static const char *schema_tbl[] = {
55 "CREATE TABLE IF NOT EXISTS f_%s (offset INTEGER(12), path VARCHAR, feature_eutf8 TEXT, feature_utf8 TEXT, context_eutf8 TEXT)",
56 "CREATE INDEX IF NOT EXISTS f_%s_idx1 ON f_%s(offset)",
57 "CREATE INDEX IF NOT EXISTS f_%s_idx2 ON f_%s(feature_eutf8)",
58 "CREATE INDEX IF NOT EXISTS f_%s_idx3 ON f_%s(feature_utf8)",
59 "INSERT INTO be_features (tablename,comment) VALUES ('f_%s','')",
60 0};
61
62 /* This creates the base histogram. Note that the SQL fails if the histogram exists */
63 static const char *schema_hist[] = {
64 "CREATE TABLE h_%s (count INTEGER(12), feature_utf8 TEXT)",
65 "CREATE INDEX h_%s_idx1 ON h_%s(count)",
66 "CREATE INDEX h_%s_idx2 ON h_%s(feature_utf8)",
67 0};
68
69 /* This performs the histogram operation */
70 static const char *schema_hist1[] = {
71 "INSERT INTO h_%s select COUNT(*),feature_utf8 from f_%s GROUP BY feature_utf8",
72 0};
73
74 #ifdef HAVE_SQLITE3_CREATE_FUNCTION_V2
75 static const char *schema_hist2[] = {
76 "INSERT INTO h_%s select sum(count),BEHIST(feature_utf8) from h_%s where BEHIST(feature_utf8)!='' GROUP BY BEHIST(feature_utf8)",
77 0};
78 #endif
79
80 #endif
81 const char *feature_recorder::db_insert_stmt = "INSERT INTO f_%s (offset,path,feature_eutf8,feature_utf8,context_eutf8) VALUES (?1, ?2, ?3, ?4, ?5)";
82 static const char *begin_transaction[] = {"BEGIN TRANSACTION",0};
83 static const char *commit_transaction[] = {"COMMIT TRANSACTION",0};
84 void feature_recorder::besql_stmt::insert_feature(const pos0_t &pos,
85 const std::string &feature,
86 const std::string &feature8, const std::string &context)
87 {
88 #ifdef USE_SQLITE3
89 assert(stmt!=0);
90 cppmutex::lock lock(Mstmt); // grab a lock
91 const std::string &path = pos.str();
92 sqlite3_bind_int64(stmt, 1, pos.imageOffset()); // offset
93 sqlite3_bind_text(stmt, 2, path.data(), path.size(), SQLITE_STATIC); // path
94 sqlite3_bind_text(stmt, 3, feature.data(), feature.size(), SQLITE_STATIC);
95 sqlite3_bind_text(stmt, 4, feature8.data(), feature8.size(), SQLITE_STATIC);
96 sqlite3_bind_text(stmt, 5, context.data(), context.size(), SQLITE_STATIC);
97 if (sqlite3_step(stmt) != SQLITE_DONE) {
98 fprintf(stderr,"sqlite3_step failed\n");
99 }
100 sqlite3_reset(stmt);
101 #endif
102 };
103
104 feature_recorder::besql_stmt::besql_stmt(BEAPI_SQLITE3 *db3,const char *sql):Mstmt(),stmt()
105 {
106 #ifdef USE_SQLITE3
107 assert(db3!=0);
108 assert(sql!=0);
109 sqlite3_prepare_v2(db3,sql, strlen(sql), &stmt, NULL);
110 assert(stmt!=0);
111 #endif
112 }
113
114 feature_recorder::besql_stmt::~besql_stmt()
115 {
116 #ifdef USE_SQLITE3
117 assert(stmt!=0);
118 sqlite3_finalize(stmt);
119 stmt = 0;
120 #endif
121 }
122
123 void feature_recorder_set::db_send_sql(BEAPI_SQLITE3 *db,const char **stmts, ...)
124 {
125 #ifdef USE_SQLITE3
126 assert(db!=0);
127 for(int i=0;stmts[i];i++){
128 char *errmsg = 0;
129 char buf[65536];
130
131 va_list ap;
132 va_start(ap,stmts);
133 vsnprintf(buf,sizeof(buf),stmts[i],ap);
134 va_end(ap);
135 if(debug) std::cerr << "SQL: " << buf << "\n";
136 // Don't error on a PRAGMA
137 if((sqlite3_exec(db,buf,NULL,NULL,&errmsg) != SQLITE_OK) && (strncmp(buf,"PRAGMA",6)!=0)) {
138 fprintf(stderr,"Error executing '%s' : %s\n",buf,errmsg);
139 exit(1);
140 }
141 }
142 #endif
143 }
144
145 void feature_recorder_set::db_create_table(const std::string &name)
146 {
147 #ifdef USE_SQLITE3
148 assert(name.size()>0);
149 db_send_sql(db3,schema_tbl,name.c_str(),name.c_str());
150 #endif
151 }
152
153 BEAPI_SQLITE3 *feature_recorder_set::db_create_empty(const std::string &name)
154 {
155 #ifdef USE_SQLITE3
156 assert(name.size()>0);
157 std::string dbfname = outdir + "/" + name + SQLITE_EXTENSION;
158 if(debug) std::cerr << "create_feature_database " << dbfname << "\n";
159 BEAPI_SQLITE3 *db=0;
160 if (sqlite3_open_v2(dbfname.c_str(), &db,
161 SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_FULLMUTEX,
162 0)!=SQLITE_OK) {
163 std::cerr << "Cannot create database '" << dbfname << "': " << sqlite3_errmsg(db) << "\n";
164 sqlite3_close(db);
165 exit(1);
166 }
167 return db;
168 #else
169 return 0;
170 #endif
171 }
172
173 #pragma GCC diagnostic ignored "-Wmissing-noreturn"
174 void feature_recorder_set::db_create()
175 {
176 #ifdef USE_SQLITE3
177 assert(db3==0);
178 db3 = db_create_empty("report");
179 db_send_sql(db3,schema_db);
180 #else
181 std::cerr << "*** CANNOT CREATE SQLITE3 DATABASE ***\n";
182 std::cerr << "*** Compiled without libsqlite ***\n";
183 assert(0 && debug); // prevent debug from being not used
184 #endif
185 }
186
187 void feature_recorder_set::db_close()
188 {
189 #ifdef USE_SQLITE3
190 if(db3){
191 if(debug) std::cerr << "db_close()\n";
192 sqlite3_close(db3);
193 db3 = 0;
194 }
195 #endif
196 }
197
198 void feature_recorder_set::db_transaction_begin()
199 {
200 cppmutex::lock lock(Min_transaction);
201 if(!in_transaction){
202 db_send_sql(db3,begin_transaction);
203 in_transaction = true;
204 }
205 }
206
207 void feature_recorder_set::db_transaction_commit()
208 {
209 cppmutex::lock lock(Min_transaction);
210 if(in_transaction){
211 db_send_sql(db3,commit_transaction);
212 in_transaction = false;
213 } else {
214 std::cerr << "No transaction to commit\n";
215 }
216 }
217
218 /* Hook for writing feature to SQLite3 database */
219 void feature_recorder::db_write0(const pos0_t &pos0,const std::string &feature,const std::string &context)
220 {
221 /**
222 * Note: this is not very efficient, passing through a quoted feature and then unquoting it.
223 * We could make this more efficient.
224 */
225 std::string *feature8 = HistogramMaker::convert_utf16_to_utf8(feature_recorder::unquote_string(feature));
226 assert(bs!=0);
227 bs->insert_feature(pos0,feature,
228 feature8 ? *feature8 : feature,
229 flag_set(feature_recorder::FLAG_NO_CONTEXT) ? "" : context);
230 if (feature8) delete feature8;
231 }
232
233 /* Hook for writing histogram
234 */
235 #ifdef USE_SQLITE3
236 static int callback_counter(void *param, int argc, char **argv, char **azColName)
237 {
238 int *counter = reinterpret_cast<int *>(param);
239 (*counter)++;
240 return 0;
241 }
242
243 #ifdef HAVE_SQLITE3_CREATE_FUNCTION_V2
244 static void behist(sqlite3_context *ctx,int argc,sqlite3_value**argv)
245 {
246 const histogram_def *def = reinterpret_cast<const histogram_def *>(sqlite3_user_data(ctx));
247 if(debug) std::cerr << "behist feature=" << def->feature << " suffix="
248 << def->suffix << " argc=" << argc << "value = " << sqlite3_value_text(argv[0]) << "\n";
249 std::string new_feature(reinterpret_cast<const char *>(sqlite3_value_text(argv[0])));
250 if (def->reg.search(new_feature,&new_feature,0,0)) {
251 sqlite3_result_text(ctx,new_feature.c_str(),new_feature.size(),SQLITE_TRANSIENT);
252 }
253 }
254 #endif
255 #endif
256
257 void feature_recorder::dump_histogram_db(const histogram_def &def,void *user,feature_recorder::dump_callback_t cb) const
258 {
259 #ifdef USE_SQLITE3
260 /* First check to see if there exists a feature histogram summary. If not, make it */
261 std::string query = "SELECT name FROM sqlite_master WHERE type='table' AND name='h_" + def.feature +"'";
262 char *errmsg=0;
263 int rowcount=0;
264 if (sqlite3_exec(fs.db3,query.c_str(),callback_counter,&rowcount,&errmsg)){
265 std::cerr << "sqlite3: " << errmsg << "\n";
266 return;
267 }
268 if (rowcount==0){
269 const char *feature = def.feature.c_str();
270 fs.db_send_sql(fs.db3,schema_hist, feature, feature); // creates the histogram
271 fs.db_send_sql(fs.db3,schema_hist1, feature, feature); // creates the histogram
272 }
273 #ifdef HAVE_SQLITE3_CREATE_FUNCTION_V2
274 /* Now create the summarized histogram for the regex, if it is not existing, but only if we have
275 * sqlite3_create_function_v2
276 */
277 if (def.pattern.size()>0){
278 /* Create the database where we will add the histogram */
279 std::string hname = def.feature + "_" + def.suffix;
280
281 /* Remove any "-" characters if present */
282 for(size_t i=0;i<hname.size();i++){
283 if (hname[i]=='-') hname[i]='_';
284 }
285
286 if(debug) std::cerr << "CREATING TABLE = " << hname << "\n";
287 if (sqlite3_create_function_v2(fs.db3,"BEHIST",1,SQLITE_UTF8|SQLITE_DETERMINISTIC,
288 (void *)&def,behist,0,0,0)) {
289 std::cerr << "could not register function BEHIST\n";
290 return;
291 }
292 const char *fn = def.feature.c_str();
293 const char *hn = hname.c_str();
294 fs.db_send_sql(fs.db3,schema_hist, hn , hn); // create the table
295 fs.db_send_sql(fs.db3,schema_hist2, hn , fn); // select into it from a function of the old histogram table
296
297 /* erase the user defined function */
298 if (sqlite3_create_function_v2(fs.db3,"BEHIST",1,SQLITE_UTF8|SQLITE_DETERMINISTIC,
299 (void *)&def,0,0,0,0)) {
300 std::cerr << "could not remove function BEHIST\n";
301 return;
302 }
303 }
304 #endif
305 #endif
306 }
307
308 #ifdef STAND
309 static std::string hash_name("md5");
310 static std::string hash_func(const uint8_t *buf,size_t bufsize)
311 {
312 if(hash_name=="md5" || hash_name=="MD5"){
313 return md5_generator::hash_buf(buf,bufsize).hexdigest();
314 }
315 if(hash_name=="sha1" || hash_name=="SHA1" || hash_name=="sha-1" || hash_name=="SHA-1"){
316 return sha1_generator::hash_buf(buf,bufsize).hexdigest();
317 }
318 if(hash_name=="sha256" || hash_name=="SHA256" || hash_name=="sha-256" || hash_name=="SHA-256"){
319 return sha256_generator::hash_buf(buf,bufsize).hexdigest();
320 }
321 std::cerr << "Invalid hash name: " << hash_name << "\n";
322 std::cerr << "This version of bulk_extractor only supports MD5, SHA1, and SHA256\n";
323 exit(1);
324 }
325 static feature_recorder_set::hash_def my_hasher(hash_name,hash_func);
326
327 feature_recorder_set::feature_recorder_set(uint32_t flags_,const feature_recorder_set::hash_def &hasher_):
328 flags(flags_),seen_set(),input_fname(),
329 outdir(),
330 frm(),
331 histogram_defs(),
332 db3(),
333 alert_list(),stop_list(),
334 scanner_stats(),hasher(hasher_)
335 {
336 }
337
338 feature_recorder *feature_recorder_set::create_name_factory(const std::string &name_){return 0;}
339 void feature_recorder_set::create_name(const std::string &name,bool create_stop_also){}
340 bool feature_recorder_set::check_previously_processed(const uint8_t *buf,size_t bufsize){return 0;}
341 feature_recorder *feature_recorder_set::get_name(const std::string &name) const{return 0;}
342 feature_recorder *feature_recorder_set::get_alert_recorder() const{return 0;}
343 void feature_recorder_set::get_feature_file_list(std::vector<std::string> &ret){}
344
345 int main(int argc,char **argv)
346 {
347 const char *dbfile = "test.sql3";
348 char *errmsg = 0;
349 sqlite3 *db=0;
350
351 feature_recorder_set fs(0,my_hasher);
352
353 unlink(dbfile);
354 fs.db_create();
355 if(1){
356 /* Create an email table */
357 fs.db_create_table("email");
358
359 /* Lets throw a million features into the table as a test */
360 //sqlite3_exec(db,"BEGIN TRANSACTION",NULL,NULL,&errmsg);
361 beapi_sql_stmt s(db,"email");
362 for(int i=0;i<1000000;i++){
363 pos0_t p;
364 pos0_t p1 = p+i;
365
366 if(i%10000==0) printf("i=%d\n",i);
367
368 char feature[64];
369 snprintf(feature,sizeof(feature),"user%d@company.com",i);
370 char context[64];
371 snprintf(context,sizeof(context),"this is the context user%d@company.com yes it is!",i);
372 //insert_statement(stmt,p1,feature,context);
373 }
374 //sqlite3_exec(db,"COMMIT TRANSACTION",NULL,NULL,&errmsg);
375 }
376 fs.db_close();
377 }
378 #endif
379