tcpflow  1.6.1
About: tcpflow is a TCP/IP packet demultiplexer that captures data transmitted as part of TCP connections (flows), and stores the data in a way that is convenient for protocol analysis and debugging.
  Fossies Dox: tcpflow-1.6.1.tar.gz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

feature_recorder_sql.cpp
Go to the documentation of this file.
1 /*
2  * Feature recorder mods for writing features into an SQLite3 database.
3  */
4 
5 /* http://blog.quibb.org/2010/08/fast-bulk-inserts-into-sqlite/ */
6 
7 #include "config.h"
8 
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <unistd.h>
13 #include <sbuf.h>
14 
15 #include "bulk_extractor_i.h"
16 #include "histogram.h"
17 
18 /*
19  * Time results with ubnist1 on R4:
20  * no SQL - 79 seconds
21  * no pragmas - 651 seconds
22  * "PRAGMA synchronous = OFF", - 146 second
23  * "PRAGMA synchronous = OFF", "PRAGMA journal_mode=MEMORY", - 79 seconds
24  *
25  * Time with domexusers:
26  * no SQL -
27  */
28 
29 
30 #if defined(HAVE_LIBSQLITE3) && defined(HAVE_SQLITE3_H)
31 #define USE_SQLITE3
32 #endif
33 #define SQLITE_EXTENSION ".sqlite"
34 
35 #ifndef SQLITE_DETERMINISTIC
36 #define SQLITE_DETERMINISTIC 0
37 #endif
38 
39 static int debug = 0;
40 
41 #ifdef USE_SQLITE3
42 static const char *schema_db[] = {
43  "PRAGMA synchronous = OFF",
44  "PRAGMA journal_mode=MEMORY",
45  //"PRAGMA temp_store=MEMORY", // did not improve performance
46  "PRAGMA cache_size = 200000",
47  "CREATE TABLE IF NOT EXISTS db_info (schema_ver INTEGER, bulk_extractor_ver INTEGER)",
48  "INSERT INTO db_info (schema_ver, bulk_extractor_ver) VALUES (1,1)",
49  "CREATE TABLE IF NOT EXISTS be_features (tablename VARCHAR,comment TEXT)",
50  "CREATE TABLE IF NOT EXISTS be_config (name VARCHAR,value VARCHAR)",
51  0};
52 
53 /* Create a feature table and note that it has been created in be_features */
54 static const char *schema_tbl[] = {
55  "CREATE TABLE IF NOT EXISTS f_%s (offset INTEGER(12), path VARCHAR, feature_eutf8 TEXT, feature_utf8 TEXT, context_eutf8 TEXT)",
56  "CREATE INDEX IF NOT EXISTS f_%s_idx1 ON f_%s(offset)",
57  "CREATE INDEX IF NOT EXISTS f_%s_idx2 ON f_%s(feature_eutf8)",
58  "CREATE INDEX IF NOT EXISTS f_%s_idx3 ON f_%s(feature_utf8)",
59  "INSERT INTO be_features (tablename,comment) VALUES ('f_%s','')",
60  0};
61 
62 /* This creates the base histogram. Note that the SQL fails if the histogram exists */
63 static const char *schema_hist[] = {
64  "CREATE TABLE h_%s (count INTEGER(12), feature_utf8 TEXT)",
65  "CREATE INDEX h_%s_idx1 ON h_%s(count)",
66  "CREATE INDEX h_%s_idx2 ON h_%s(feature_utf8)",
67  0};
68 
69 /* This performs the histogram operation */
70 static const char *schema_hist1[] = {
71  "INSERT INTO h_%s select COUNT(*),feature_utf8 from f_%s GROUP BY feature_utf8",
72  0};
73 
74 #ifdef HAVE_SQLITE3_CREATE_FUNCTION_V2
75 static const char *schema_hist2[] = {
76  "INSERT INTO h_%s select sum(count),BEHIST(feature_utf8) from h_%s where BEHIST(feature_utf8)!='' GROUP BY BEHIST(feature_utf8)",
77  0};
78 #endif
79 
80 #endif
81 const char *feature_recorder::db_insert_stmt = "INSERT INTO f_%s (offset,path,feature_eutf8,feature_utf8,context_eutf8) VALUES (?1, ?2, ?3, ?4, ?5)";
82 static const char *begin_transaction[] = {"BEGIN TRANSACTION",0};
83 static const char *commit_transaction[] = {"COMMIT TRANSACTION",0};
85  const std::string &feature,
86  const std::string &feature8, const std::string &context)
87 {
88 #ifdef USE_SQLITE3
89  assert(stmt!=0);
90  cppmutex::lock lock(Mstmt); // grab a lock
91  const std::string &path = pos.str();
92  sqlite3_bind_int64(stmt, 1, pos.imageOffset()); // offset
93  sqlite3_bind_text(stmt, 2, path.data(), path.size(), SQLITE_STATIC); // path
94  sqlite3_bind_text(stmt, 3, feature.data(), feature.size(), SQLITE_STATIC);
95  sqlite3_bind_text(stmt, 4, feature8.data(), feature8.size(), SQLITE_STATIC);
96  sqlite3_bind_text(stmt, 5, context.data(), context.size(), SQLITE_STATIC);
97  if (sqlite3_step(stmt) != SQLITE_DONE) {
98  fprintf(stderr,"sqlite3_step failed\n");
99  }
100  sqlite3_reset(stmt);
101 #endif
102 };
103 
104 feature_recorder::besql_stmt::besql_stmt(BEAPI_SQLITE3 *db3,const char *sql):Mstmt(),stmt()
105 {
106 #ifdef USE_SQLITE3
107  assert(db3!=0);
108  assert(sql!=0);
109  sqlite3_prepare_v2(db3,sql, strlen(sql), &stmt, NULL);
110  assert(stmt!=0);
111 #endif
112 }
113 
115 {
116 #ifdef USE_SQLITE3
117  assert(stmt!=0);
118  sqlite3_finalize(stmt);
119  stmt = 0;
120 #endif
121 }
122 
123 void feature_recorder_set::db_send_sql(BEAPI_SQLITE3 *db,const char **stmts, ...)
124 {
125 #ifdef USE_SQLITE3
126  assert(db!=0);
127  for(int i=0;stmts[i];i++){
128  char *errmsg = 0;
129  char buf[65536];
130 
131  va_list ap;
132  va_start(ap,stmts);
133  vsnprintf(buf,sizeof(buf),stmts[i],ap);
134  va_end(ap);
135  if(debug) std::cerr << "SQL: " << buf << "\n";
136  // Don't error on a PRAGMA
137  if((sqlite3_exec(db,buf,NULL,NULL,&errmsg) != SQLITE_OK) && (strncmp(buf,"PRAGMA",6)!=0)) {
138  fprintf(stderr,"Error executing '%s' : %s\n",buf,errmsg);
139  exit(1);
140  }
141  }
142 #endif
143 }
144 
146 {
147 #ifdef USE_SQLITE3
148  assert(name.size()>0);
149  db_send_sql(db3,schema_tbl,name.c_str(),name.c_str());
150 #endif
151 }
152 
154 {
155 #ifdef USE_SQLITE3
156  assert(name.size()>0);
157  std::string dbfname = outdir + "/" + name + SQLITE_EXTENSION;
158  if(debug) std::cerr << "create_feature_database " << dbfname << "\n";
159  BEAPI_SQLITE3 *db=0;
160  if (sqlite3_open_v2(dbfname.c_str(), &db,
161  SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_FULLMUTEX,
162  0)!=SQLITE_OK) {
163  std::cerr << "Cannot create database '" << dbfname << "': " << sqlite3_errmsg(db) << "\n";
164  sqlite3_close(db);
165  exit(1);
166  }
167  return db;
168 #else
169  return 0;
170 #endif
171 }
172 
173 #pragma GCC diagnostic ignored "-Wmissing-noreturn"
175 {
176 #ifdef USE_SQLITE3
177  assert(db3==0);
178  db3 = db_create_empty("report");
179  db_send_sql(db3,schema_db);
180 #else
181  std::cerr << "*** CANNOT CREATE SQLITE3 DATABASE ***\n";
182  std::cerr << "*** Compiled without libsqlite ***\n";
183  assert(0 && debug); // prevent debug from being not used
184 #endif
185 }
186 
188 {
189 #ifdef USE_SQLITE3
190  if(db3){
191  if(debug) std::cerr << "db_close()\n";
192  sqlite3_close(db3);
193  db3 = 0;
194  }
195 #endif
196 }
197 
199 {
200  cppmutex::lock lock(Min_transaction);
201  if(!in_transaction){
202  db_send_sql(db3,begin_transaction);
203  in_transaction = true;
204  }
205 }
206 
208 {
209  cppmutex::lock lock(Min_transaction);
210  if(in_transaction){
211  db_send_sql(db3,commit_transaction);
212  in_transaction = false;
213  } else {
214  std::cerr << "No transaction to commit\n";
215  }
216 }
217 
218 /* Hook for writing feature to SQLite3 database */
219 void feature_recorder::db_write0(const pos0_t &pos0,const std::string &feature,const std::string &context)
220 {
221  /**
222  * Note: this is not very efficient, passing through a quoted feature and then unquoting it.
223  * We could make this more efficient.
224  */
226  assert(bs!=0);
227  bs->insert_feature(pos0,feature,
228  feature8 ? *feature8 : feature,
230  if (feature8) delete feature8;
231 }
232 
233 /* Hook for writing histogram
234  */
235 #ifdef USE_SQLITE3
236 static int callback_counter(void *param, int argc, char **argv, char **azColName)
237 {
238  int *counter = reinterpret_cast<int *>(param);
239  (*counter)++;
240  return 0;
241 }
242 
243 #ifdef HAVE_SQLITE3_CREATE_FUNCTION_V2
244 static void behist(sqlite3_context *ctx,int argc,sqlite3_value**argv)
245 {
246  const histogram_def *def = reinterpret_cast<const histogram_def *>(sqlite3_user_data(ctx));
247  if(debug) std::cerr << "behist feature=" << def->feature << " suffix="
248  << def->suffix << " argc=" << argc << "value = " << sqlite3_value_text(argv[0]) << "\n";
249  std::string new_feature(reinterpret_cast<const char *>(sqlite3_value_text(argv[0])));
250  if (def->reg.search(new_feature,&new_feature,0,0)) {
251  sqlite3_result_text(ctx,new_feature.c_str(),new_feature.size(),SQLITE_TRANSIENT);
252  }
253 }
254 #endif
255 #endif
256 
258 {
259 #ifdef USE_SQLITE3
260  /* First check to see if there exists a feature histogram summary. If not, make it */
261  std::string query = "SELECT name FROM sqlite_master WHERE type='table' AND name='h_" + def.feature +"'";
262  char *errmsg=0;
263  int rowcount=0;
264  if (sqlite3_exec(fs.db3,query.c_str(),callback_counter,&rowcount,&errmsg)){
265  std::cerr << "sqlite3: " << errmsg << "\n";
266  return;
267  }
268  if (rowcount==0){
269  const char *feature = def.feature.c_str();
270  fs.db_send_sql(fs.db3,schema_hist, feature, feature); // creates the histogram
271  fs.db_send_sql(fs.db3,schema_hist1, feature, feature); // creates the histogram
272  }
273 #ifdef HAVE_SQLITE3_CREATE_FUNCTION_V2
274  /* Now create the summarized histogram for the regex, if it is not existing, but only if we have
275  * sqlite3_create_function_v2
276  */
277  if (def.pattern.size()>0){
278  /* Create the database where we will add the histogram */
279  std::string hname = def.feature + "_" + def.suffix;
280 
281  /* Remove any "-" characters if present */
282  for(size_t i=0;i<hname.size();i++){
283  if (hname[i]=='-') hname[i]='_';
284  }
285 
286  if(debug) std::cerr << "CREATING TABLE = " << hname << "\n";
287  if (sqlite3_create_function_v2(fs.db3,"BEHIST",1,SQLITE_UTF8|SQLITE_DETERMINISTIC,
288  (void *)&def,behist,0,0,0)) {
289  std::cerr << "could not register function BEHIST\n";
290  return;
291  }
292  const char *fn = def.feature.c_str();
293  const char *hn = hname.c_str();
294  fs.db_send_sql(fs.db3,schema_hist, hn , hn); // create the table
295  fs.db_send_sql(fs.db3,schema_hist2, hn , fn); // select into it from a function of the old histogram table
296 
297  /* erase the user defined function */
298  if (sqlite3_create_function_v2(fs.db3,"BEHIST",1,SQLITE_UTF8|SQLITE_DETERMINISTIC,
299  (void *)&def,0,0,0,0)) {
300  std::cerr << "could not remove function BEHIST\n";
301  return;
302  }
303  }
304 #endif
305 #endif
306 }
307 
308 #ifdef STAND
309 static std::string hash_name("md5");
310 static std::string hash_func(const uint8_t *buf,size_t bufsize)
311 {
312  if(hash_name=="md5" || hash_name=="MD5"){
313  return md5_generator::hash_buf(buf,bufsize).hexdigest();
314  }
315  if(hash_name=="sha1" || hash_name=="SHA1" || hash_name=="sha-1" || hash_name=="SHA-1"){
316  return sha1_generator::hash_buf(buf,bufsize).hexdigest();
317  }
318  if(hash_name=="sha256" || hash_name=="SHA256" || hash_name=="sha-256" || hash_name=="SHA-256"){
319  return sha256_generator::hash_buf(buf,bufsize).hexdigest();
320  }
321  std::cerr << "Invalid hash name: " << hash_name << "\n";
322  std::cerr << "This version of bulk_extractor only supports MD5, SHA1, and SHA256\n";
323  exit(1);
324 }
325 static feature_recorder_set::hash_def my_hasher(hash_name,hash_func);
326 
328  flags(flags_),seen_set(),input_fname(),
329  outdir(),
330  frm(),
331  histogram_defs(),
332  db3(),
333  alert_list(),stop_list(),
334  scanner_stats(),hasher(hasher_)
335 {
336 }
337 
338 feature_recorder *feature_recorder_set::create_name_factory(const std::string &name_){return 0;}
339 void feature_recorder_set::create_name(const std::string &name,bool create_stop_also){}
340 bool feature_recorder_set::check_previously_processed(const uint8_t *buf,size_t bufsize){return 0;}
341 feature_recorder *feature_recorder_set::get_name(const std::string &name) const{return 0;}
343 void feature_recorder_set::get_feature_file_list(std::vector<std::string> &ret){}
344 
345 int main(int argc,char **argv)
346 {
347  const char *dbfile = "test.sql3";
348  char *errmsg = 0;
349  sqlite3 *db=0;
350 
351  feature_recorder_set fs(0,my_hasher);
352 
353  unlink(dbfile);
354  fs.db_create();
355  if(1){
356  /* Create an email table */
357  fs.db_create_table("email");
358 
359  /* Lets throw a million features into the table as a test */
360  //sqlite3_exec(db,"BEGIN TRANSACTION",NULL,NULL,&errmsg);
361  beapi_sql_stmt s(db,"email");
362  for(int i=0;i<1000000;i++){
363  pos0_t p;
364  pos0_t p1 = p+i;
365 
366  if(i%10000==0) printf("i=%d\n",i);
367 
368  char feature[64];
369  snprintf(feature,sizeof(feature),"user%d@company.com",i);
370  char context[64];
371  snprintf(context,sizeof(context),"this is the context user%d@company.com yes it is!",i);
372  //insert_statement(stmt,p1,feature,context);
373  }
374  //sqlite3_exec(db,"COMMIT TRANSACTION",NULL,NULL,&errmsg);
375  }
376  fs.db_close();
377 }
378 #endif
379 
int search(const std::string &line, std::string *found, size_t *offset, size_t *len) const
Definition: beregex.cpp:85
void insert_feature(const pos0_t &pos, const std::string &feature, const std::string &feature8, const std::string &context)
besql_stmt(const besql_stmt &)
virtual void db_write0(const pos0_t &pos0, const std::string &feature, const std::string &context)
class besql_stmt * bs
static const char * db_insert_stmt
const std::string name
static const int FLAG_NO_CONTEXT
int() dump_callback_t(void *user, const feature_recorder &fr, const histogram_def &def, const std::string &feature, const uint64_t &count)
static uint32_t debug
static std::string unquote_string(const std::string &feature)
class feature_recorder_set & fs
histogram_defs_t histogram_defs
virtual void dump_histogram_db(const histogram_def &def, void *user, feature_recorder::dump_callback_t cb) const
bool flag_set(uint32_t f) const
static hash__< md, SIZE > hash_buf(const uint8_t *buf, size_t bufsize)
Definition: hash_t.h:264
Definition: sbuf.h:70
#define SQLITE_DETERMINISTIC
static int debug
static const char * begin_transaction[]
#define SQLITE_EXTENSION
static const char * commit_transaction[]
uint64_t imageOffset() const
Definition: sbuf.h:116
std::string str() const
Definition: sbuf.h:79
#define BEAPI_SQLITE3
virtual feature_recorder * get_alert_recorder() const
void db_create_table(const std::string &name)
virtual bool check_previously_processed(const uint8_t *buf, size_t bufsize)
virtual feature_recorder * create_name_factory(const std::string &name_)
virtual void db_send_sql(void *db3, const char **stmts,...)
virtual void create_name(const std::string &name, bool create_stop_also)
feature_recorder_set(const feature_recorder_set &fs)
virtual feature_recorder * get_name(const std::string &name) const
virtual void get_feature_file_list(std::vector< std::string > &ret)
virtual void * db_create_empty(const std::string &name)
static std::string * convert_utf16_to_utf8(const std::string &str)
Definition: histogram.cpp:119
const char * name
Definition: http_parser.c:465
flags
Definition: http_parser.h:216
unsigned int uint32_t
Definition: core.h:40
const std::string feature
const std::string pattern
const std::string suffix
const beregex reg
int main(int argc, char *argv[])
Definition: tcpflow.cpp:565
unsigned char uint8_t
Definition: util.h:6