"Fossies" - the Fresh Open Source Software Archive

Member "tcpflow-1.6.1/src/be13_api/feature_recorder_sql.cpp" (19 Feb 2021, 13079 Bytes) of package /linux/misc/tcpflow-1.6.1.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "feature_recorder_sql.cpp" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 1.4.5_vs_1.5.0.

    1 /*
    2  * Feature recorder mods for writing features into an SQLite3 database.
    3  */
    4 
    5 /* http://blog.quibb.org/2010/08/fast-bulk-inserts-into-sqlite/ */
    6 
    7 #include "config.h"
    8 
    9 #include <stdio.h>
   10 #include <stdlib.h>
   11 #include <string.h>
   12 #include <unistd.h>
   13 #include <sbuf.h>
   14 
   15 #include "bulk_extractor_i.h"
   16 #include "histogram.h"
   17 
   18 /*
   19  * Time results with ubnist1 on R4:
   20  * no SQL - 79 seconds
   21  * no pragmas - 651 seconds
   22  * "PRAGMA synchronous =  OFF", - 146 second
   23  * "PRAGMA synchronous =  OFF", "PRAGMA journal_mode=MEMORY", - 79 seconds
   24  *
   25  * Time with domexusers:
   26  * no SQL - 
   27  */
   28 
   29 
   30 #if defined(HAVE_LIBSQLITE3) && defined(HAVE_SQLITE3_H)
   31 #define USE_SQLITE3 
   32 #endif
   33 #define SQLITE_EXTENSION ".sqlite"
   34 
   35 #ifndef SQLITE_DETERMINISTIC
   36 #define SQLITE_DETERMINISTIC 0
   37 #endif
   38 
   39 static int debug  = 0;
   40 
   41 #ifdef USE_SQLITE3
   42 static const char *schema_db[] = {
   43     "PRAGMA synchronous =  OFF", 
   44     "PRAGMA journal_mode=MEMORY",
   45     //"PRAGMA temp_store=MEMORY",  // did not improve performance
   46     "PRAGMA cache_size = 200000", 
   47     "CREATE TABLE IF NOT EXISTS db_info (schema_ver INTEGER, bulk_extractor_ver INTEGER)",
   48     "INSERT INTO  db_info (schema_ver, bulk_extractor_ver) VALUES (1,1)",
   49     "CREATE TABLE IF NOT EXISTS be_features (tablename VARCHAR,comment TEXT)",
   50     "CREATE TABLE IF NOT EXISTS be_config (name VARCHAR,value VARCHAR)",
   51     0};
   52 
   53 /* Create a feature table and note that it has been created in be_features */
   54 static const char *schema_tbl[] = {
   55     "CREATE TABLE IF NOT EXISTS f_%s (offset INTEGER(12), path VARCHAR, feature_eutf8 TEXT, feature_utf8 TEXT, context_eutf8 TEXT)",
   56     "CREATE INDEX IF NOT EXISTS f_%s_idx1 ON f_%s(offset)",
   57     "CREATE INDEX IF NOT EXISTS f_%s_idx2 ON f_%s(feature_eutf8)",
   58     "CREATE INDEX IF NOT EXISTS f_%s_idx3 ON f_%s(feature_utf8)",
   59     "INSERT INTO be_features (tablename,comment) VALUES ('f_%s','')",
   60     0};
   61 
   62 /* This creates the base histogram. Note that the SQL fails if the histogram exists */
   63 static const char *schema_hist[] = {
   64     "CREATE TABLE h_%s (count INTEGER(12), feature_utf8 TEXT)",
   65     "CREATE INDEX h_%s_idx1 ON h_%s(count)",
   66     "CREATE INDEX h_%s_idx2 ON h_%s(feature_utf8)",
   67     0};
   68 
   69 /* This performs the histogram operation */
   70 static const char *schema_hist1[] = {
   71     "INSERT INTO h_%s select COUNT(*),feature_utf8 from f_%s GROUP BY feature_utf8",
   72     0};
   73 
   74 #ifdef HAVE_SQLITE3_CREATE_FUNCTION_V2
   75 static const char *schema_hist2[] = {
   76     "INSERT INTO h_%s select sum(count),BEHIST(feature_utf8) from h_%s where BEHIST(feature_utf8)!='' GROUP BY BEHIST(feature_utf8)",
   77     0};
   78 #endif
   79 
   80 #endif
   81 const char *feature_recorder::db_insert_stmt = "INSERT INTO f_%s (offset,path,feature_eutf8,feature_utf8,context_eutf8) VALUES (?1, ?2, ?3, ?4, ?5)";
   82 static const char *begin_transaction[] = {"BEGIN TRANSACTION",0};
   83 static const char *commit_transaction[] = {"COMMIT TRANSACTION",0};
   84 void feature_recorder::besql_stmt::insert_feature(const pos0_t &pos,
   85                                                         const std::string &feature,
   86                                                         const std::string &feature8, const std::string &context)
   87 {
   88 #ifdef USE_SQLITE3
   89     assert(stmt!=0);
   90     cppmutex::lock lock(Mstmt);           // grab a lock
   91     const std::string &path = pos.str();
   92     sqlite3_bind_int64(stmt, 1, pos.imageOffset()); // offset
   93     sqlite3_bind_text(stmt, 2, path.data(), path.size(), SQLITE_STATIC); // path
   94     sqlite3_bind_text(stmt, 3, feature.data(), feature.size(), SQLITE_STATIC);
   95     sqlite3_bind_text(stmt, 4, feature8.data(), feature8.size(), SQLITE_STATIC);
   96     sqlite3_bind_text(stmt, 5, context.data(), context.size(), SQLITE_STATIC);
   97     if (sqlite3_step(stmt) != SQLITE_DONE) {
   98         fprintf(stderr,"sqlite3_step failed\n");
   99     }
  100     sqlite3_reset(stmt);
  101 #endif
  102 };
  103 
  104 feature_recorder::besql_stmt::besql_stmt(BEAPI_SQLITE3 *db3,const char *sql):Mstmt(),stmt()
  105 {
  106 #ifdef USE_SQLITE3
  107     assert(db3!=0);
  108     assert(sql!=0);
  109     sqlite3_prepare_v2(db3,sql, strlen(sql), &stmt, NULL);
  110     assert(stmt!=0);
  111 #endif
  112 }
  113 
  114 feature_recorder::besql_stmt::~besql_stmt()
  115 {
  116 #ifdef USE_SQLITE3
  117     assert(stmt!=0);
  118     sqlite3_finalize(stmt);
  119     stmt = 0;
  120 #endif
  121 }
  122 
  123 void feature_recorder_set::db_send_sql(BEAPI_SQLITE3 *db,const char **stmts, ...)
  124 {
  125 #ifdef USE_SQLITE3
  126     assert(db!=0);
  127     for(int i=0;stmts[i];i++){
  128         char *errmsg = 0;
  129         char buf[65536];
  130 
  131         va_list ap;
  132         va_start(ap,stmts);
  133         vsnprintf(buf,sizeof(buf),stmts[i],ap);
  134         va_end(ap);
  135         if(debug) std::cerr << "SQL: " << buf << "\n";
  136         // Don't error on a PRAGMA
  137         if((sqlite3_exec(db,buf,NULL,NULL,&errmsg) != SQLITE_OK)  && (strncmp(buf,"PRAGMA",6)!=0)) {
  138             fprintf(stderr,"Error executing '%s' : %s\n",buf,errmsg);
  139             exit(1);
  140         }
  141     }
  142 #endif
  143 }
  144 
  145 void feature_recorder_set::db_create_table(const std::string &name)
  146 {
  147 #ifdef USE_SQLITE3
  148     assert(name.size()>0);
  149     db_send_sql(db3,schema_tbl,name.c_str(),name.c_str());
  150 #endif
  151 }
  152 
  153 BEAPI_SQLITE3 *feature_recorder_set::db_create_empty(const std::string &name)
  154 {
  155 #ifdef USE_SQLITE3
  156     assert(name.size()>0);
  157     std::string dbfname  = outdir + "/" + name +  SQLITE_EXTENSION;
  158     if(debug) std::cerr << "create_feature_database " << dbfname << "\n";
  159     BEAPI_SQLITE3 *db=0;
  160     if (sqlite3_open_v2(dbfname.c_str(), &db,
  161                         SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_FULLMUTEX,
  162                         0)!=SQLITE_OK) {
  163         std::cerr << "Cannot create database '" << dbfname << "': " << sqlite3_errmsg(db) << "\n";
  164         sqlite3_close(db);
  165         exit(1);
  166     }
  167     return db;
  168 #else
  169     return 0;
  170 #endif
  171 }
  172 
  173 #pragma GCC diagnostic ignored "-Wmissing-noreturn"
  174 void feature_recorder_set::db_create()
  175 {
  176 #ifdef USE_SQLITE3
  177     assert(db3==0);
  178     db3 = db_create_empty("report");
  179     db_send_sql(db3,schema_db);
  180 #else
  181     std::cerr << "*** CANNOT CREATE SQLITE3 DATABASE ***\n";
  182     std::cerr << "*** Compiled without libsqlite     ***\n";
  183     assert(0 && debug);                 // prevent debug from being not used
  184 #endif
  185 }
  186 
  187 void feature_recorder_set::db_close()
  188 {
  189 #ifdef USE_SQLITE3
  190     if(db3){
  191         if(debug) std::cerr << "db_close()\n";
  192         sqlite3_close(db3);
  193         db3 = 0;
  194     }
  195 #endif
  196 }
  197 
  198 void feature_recorder_set::db_transaction_begin()
  199 {
  200     cppmutex::lock lock(Min_transaction);
  201     if(!in_transaction){
  202         db_send_sql(db3,begin_transaction);
  203         in_transaction = true;
  204     }
  205 }
  206 
  207 void feature_recorder_set::db_transaction_commit()
  208 {
  209     cppmutex::lock lock(Min_transaction);
  210     if(in_transaction){
  211         db_send_sql(db3,commit_transaction);
  212         in_transaction = false;
  213     } else {
  214         std::cerr << "No transaction to commit\n";
  215     }
  216 }
  217 
  218 /* Hook for writing feature to SQLite3 database */
  219 void feature_recorder::db_write0(const pos0_t &pos0,const std::string &feature,const std::string &context)
  220 {
  221     /**
  222      * Note: this is not very efficient, passing through a quoted feature and then unquoting it.
  223      * We could make this more efficient.
  224      */
  225     std::string *feature8 = HistogramMaker::convert_utf16_to_utf8(feature_recorder::unquote_string(feature));
  226     assert(bs!=0);
  227     bs->insert_feature(pos0,feature,
  228                          feature8 ? *feature8 : feature,
  229                          flag_set(feature_recorder::FLAG_NO_CONTEXT) ? "" : context);
  230     if (feature8) delete feature8;
  231 }
  232 
  233 /* Hook for writing histogram
  234  */
  235 #ifdef USE_SQLITE3
  236 static int callback_counter(void *param, int argc, char **argv, char **azColName)
  237 {
  238     int *counter = reinterpret_cast<int *>(param);
  239     (*counter)++;
  240     return 0;
  241 }
  242 
  243 #ifdef HAVE_SQLITE3_CREATE_FUNCTION_V2
  244 static void behist(sqlite3_context *ctx,int argc,sqlite3_value**argv)
  245 {
  246     const histogram_def *def = reinterpret_cast<const histogram_def *>(sqlite3_user_data(ctx));
  247     if(debug) std::cerr << "behist feature=" << def->feature << "  suffix="
  248                         << def->suffix << "  argc=" << argc << "value = " << sqlite3_value_text(argv[0]) << "\n";
  249     std::string new_feature(reinterpret_cast<const char *>(sqlite3_value_text(argv[0])));
  250     if (def->reg.search(new_feature,&new_feature,0,0)) {
  251         sqlite3_result_text(ctx,new_feature.c_str(),new_feature.size(),SQLITE_TRANSIENT);
  252     }
  253 }
  254 #endif
  255 #endif
  256 
  257 void feature_recorder::dump_histogram_db(const histogram_def &def,void *user,feature_recorder::dump_callback_t cb) const
  258 {
  259 #ifdef USE_SQLITE3
  260     /* First check to see if there exists a feature histogram summary. If not, make it */
  261     std::string query = "SELECT name FROM sqlite_master WHERE type='table' AND name='h_" + def.feature +"'";
  262     char *errmsg=0;
  263     int rowcount=0;
  264     if (sqlite3_exec(fs.db3,query.c_str(),callback_counter,&rowcount,&errmsg)){
  265         std::cerr << "sqlite3: " << errmsg << "\n";
  266         return;
  267     }
  268     if (rowcount==0){
  269         const char *feature = def.feature.c_str();
  270         fs.db_send_sql(fs.db3,schema_hist, feature, feature); // creates the histogram
  271         fs.db_send_sql(fs.db3,schema_hist1, feature, feature); // creates the histogram
  272     }
  273 #ifdef HAVE_SQLITE3_CREATE_FUNCTION_V2
  274     /* Now create the summarized histogram for the regex, if it is not existing, but only if we have
  275      * sqlite3_create_function_v2
  276      */
  277     if (def.pattern.size()>0){
  278         /* Create the database where we will add the histogram */
  279         std::string hname = def.feature + "_" + def.suffix;
  280 
  281         /* Remove any "-" characters if present */
  282         for(size_t i=0;i<hname.size();i++){
  283             if (hname[i]=='-') hname[i]='_';
  284         }
  285 
  286         if(debug) std::cerr << "CREATING TABLE = " << hname << "\n";
  287         if (sqlite3_create_function_v2(fs.db3,"BEHIST",1,SQLITE_UTF8|SQLITE_DETERMINISTIC,
  288                                        (void *)&def,behist,0,0,0)) {
  289             std::cerr << "could not register function BEHIST\n";
  290             return;
  291         }
  292         const char *fn = def.feature.c_str();
  293         const char *hn = hname.c_str();
  294         fs.db_send_sql(fs.db3,schema_hist, hn , hn); // create the table
  295         fs.db_send_sql(fs.db3,schema_hist2, hn , fn); // select into it from a function of the old histogram table
  296 
  297         /* erase the user defined function */
  298         if (sqlite3_create_function_v2(fs.db3,"BEHIST",1,SQLITE_UTF8|SQLITE_DETERMINISTIC,
  299                                        (void *)&def,0,0,0,0)) {
  300             std::cerr << "could not remove function BEHIST\n";
  301             return;
  302         }
  303     }
  304 #endif
  305 #endif
  306 }
  307 
  308 #ifdef STAND
  309 static std::string hash_name("md5");
  310 static std::string hash_func(const uint8_t *buf,size_t bufsize)
  311 {
  312     if(hash_name=="md5" || hash_name=="MD5"){
  313         return md5_generator::hash_buf(buf,bufsize).hexdigest();
  314     }
  315     if(hash_name=="sha1" || hash_name=="SHA1" || hash_name=="sha-1" || hash_name=="SHA-1"){
  316         return sha1_generator::hash_buf(buf,bufsize).hexdigest();
  317     }
  318     if(hash_name=="sha256" || hash_name=="SHA256" || hash_name=="sha-256" || hash_name=="SHA-256"){
  319         return sha256_generator::hash_buf(buf,bufsize).hexdigest();
  320     }
  321     std::cerr << "Invalid hash name: " << hash_name << "\n";
  322     std::cerr << "This version of bulk_extractor only supports MD5, SHA1, and SHA256\n";
  323     exit(1);
  324 }
  325 static feature_recorder_set::hash_def my_hasher(hash_name,hash_func);
  326 
  327 feature_recorder_set::feature_recorder_set(uint32_t flags_,const feature_recorder_set::hash_def &hasher_):
  328     flags(flags_),seen_set(),input_fname(),
  329     outdir(),
  330     frm(),
  331     histogram_defs(),
  332     db3(),
  333     alert_list(),stop_list(),
  334     scanner_stats(),hasher(hasher_)
  335 {
  336 }
  337 
  338 feature_recorder *feature_recorder_set::create_name_factory(const std::string &name_){return 0;}
  339 void feature_recorder_set::create_name(const std::string &name,bool create_stop_also){}
  340 bool feature_recorder_set::check_previously_processed(const uint8_t *buf,size_t bufsize){return 0;}
  341 feature_recorder *feature_recorder_set::get_name(const std::string &name) const{return 0;}
  342 feature_recorder *feature_recorder_set::get_alert_recorder() const{return 0;}
  343 void feature_recorder_set::get_feature_file_list(std::vector<std::string> &ret){}
  344 
  345 int main(int argc,char **argv)
  346 {
  347     const char *dbfile = "test.sql3";
  348     char *errmsg = 0;
  349     sqlite3 *db=0;
  350 
  351     feature_recorder_set fs(0,my_hasher);
  352 
  353     unlink(dbfile);
  354     fs.db_create();
  355     if(1){
  356         /* Create an email table */
  357         fs.db_create_table("email");
  358         
  359         /* Lets throw a million features into the table as a test */
  360         //sqlite3_exec(db,"BEGIN TRANSACTION",NULL,NULL,&errmsg);
  361         beapi_sql_stmt s(db,"email");
  362         for(int i=0;i<1000000;i++){
  363             pos0_t p;
  364             pos0_t p1 = p+i;
  365             
  366             if(i%10000==0) printf("i=%d\n",i);
  367             
  368             char feature[64];
  369             snprintf(feature,sizeof(feature),"user%d@company.com",i);
  370             char context[64];
  371             snprintf(context,sizeof(context),"this is the context user%d@company.com yes it is!",i);
  372             //insert_statement(stmt,p1,feature,context);
  373         }
  374         //sqlite3_exec(db,"COMMIT TRANSACTION",NULL,NULL,&errmsg);
  375     }
  376     fs.db_close();
  377 }
  378 #endif
  379