"Fossies" - the Fresh Open Source Software Archive

Member "tcpflow-1.6.1/src/be13_api/beregex.cpp" (19 Feb 2021, 5347 Bytes) of package /linux/misc/tcpflow-1.6.1.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "beregex.cpp" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 1.4.5_vs_1.5.0.

    1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
    2 
    3 #include "config.h"
    4 #include "beregex.h"
    5 
    6 #include <sys/types.h>
    7 #include <inttypes.h>
    8 #include <stdlib.h>
    9 #include <unistd.h>
   10 
   11 
   12 #if defined(HAVE_LIBTRE) && defined(HAVE_TRE_REGCOMP) && defined(HAVE_TRE_TRE_H)
   13 #define REGCOMP tre_regcomp
   14 #define REGFREE tre_regfree
   15 #define REGEXEC tre_regexec
   16 #define nreg (regex_t *)nreg_
   17 #define HAVE_REGULAR_EXPRESSIONS
   18 static const char *regex_version = "tre";
   19 #endif
   20 
   21 /* use regcomp() if tre_regcomp() is not available */
   22 #if defined(HAVE_REGCOMP) && !defined(HAVE_REGULAR_EXPRESSIONS)
   23 #define REGCOMP regcomp
   24 #define REGFREE regfree
   25 #define REGEXEC regexec
   26 #define nreg (regex_t *)nreg_
   27 #define HAVE_REGULAR_EXPRESSIONS
   28 static const char *regex_version = "system";
   29 #endif
   30 
   31 #ifndef HAVE_REGULAR_EXPRESSIONS
   32 #error bulk_extractor requires tre_regcomp or regcomp to run
   33 #error download tre from "http://laurikari.net/tre/download/"
   34 #endif
   35 
   36 const char *beregex::version(){return regex_version;}
   37 
   38 /* Only certain characters are assumed to be a regular expression. These characters are
   39  * coincidently never in email addresses.
   40  */
   41 bool beregex::is_regex(const std::string &str)
   42 {
   43     for(std::string::const_iterator it = str.begin();it!=str.end();it++){
   44         switch(*it){
   45         case '*':
   46         case '[':
   47         case '(':
   48             return true;
   49         }
   50     }
   51     return false;
   52 }
   53 
   54 beregex::beregex(const beregex &that):pat(that.pat),flags(that.flags),nreg_(0)
   55 {
   56     compile();
   57 }
   58 
   59 beregex::beregex(std::string pat_,int flags_):pat(pat_),flags(flags_),nreg_(0)
   60 {
   61     compile();
   62 }
   63 
   64 void beregex::compile()                 // compile the regex
   65 {
   66     if(pat.size()==0) return;
   67     nreg_ = calloc(sizeof(regex_t),1);
   68     if(REGCOMP(nreg,pat.c_str(),flags | REG_EXTENDED)!=0){
   69         std::cerr << "regular expression compile error '" << pat << "' flags=" << flags << "\n";
   70         exit(1);
   71     }
   72 }
   73 beregex::~beregex(){
   74     if(nreg_){
   75         REGFREE(nreg);
   76         free(nreg_);
   77         nreg_ = 0;
   78     }
   79 }
   80 /**
   81  * perform a search for a single hit. If there is a group and something is found,
   82  * set *found to be what was found, *offset to be the starting offset, and *len to be
   83  * the length. Note that this only handles a single group.
   84  */
   85 int beregex::search(const std::string &line,std::string *found,size_t *offset,size_t *len) const
   86 {
   87     static const int REGMAX=2;
   88     regmatch_t pmatch[REGMAX];
   89     if(!nreg_) return 0;
   90     memset(pmatch,0,sizeof(pmatch));
   91     int r = REGEXEC(nreg,line.c_str(),REGMAX,pmatch,0);
   92     if(r==REG_NOMATCH) return 0;
   93     if(r!=0) return 0;                  /* some kind of failure */
   94                                         /* Make copies of the first group */
   95     if(pmatch[1].rm_so != pmatch[1].rm_eo){
   96         if(found)  *found  = line.substr(pmatch[1].rm_so,pmatch[1].rm_eo-pmatch[1].rm_so);
   97         if(offset) *offset = pmatch[1].rm_so;
   98         if(len)    *len    = pmatch[1].rm_eo-pmatch[1].rm_so;
   99     }
  100     return 1;                           /* success */
  101 }
  102 /** Perform a search with an array of strings. Return 0 if success, return code if fail.*/
  103 
  104 int beregex::search(const std::string &line,std::string *matches,int REGMAX) const {
  105     if(!nreg) return 0;
  106     regmatch_t *pmatch = (regmatch_t *)calloc(sizeof(regmatch_t),REGMAX+1);
  107     int r = REGEXEC(nreg,line.c_str(),REGMAX+1,pmatch,0);
  108     if(r==0){
  109         for(int i=0;i<REGMAX;i++){
  110             size_t start = pmatch[i+1].rm_so;
  111             size_t len   = pmatch[i+1].rm_eo-pmatch[i+1].rm_so;
  112             matches[i]   = line.substr(start,len);
  113         }
  114     }
  115     free(pmatch);
  116     return r;
  117 }
  118 
  119 std::string beregex::search(const std::string &line) const
  120 {
  121     if(!nreg) return std::string();
  122     regmatch_t pmatch[2];
  123     memset(pmatch,0,sizeof(pmatch));
  124     if(REGEXEC(nreg,line.c_str(),2,pmatch,0)==0){
  125         size_t start = pmatch[1].rm_so;
  126         size_t len   = pmatch[1].rm_eo-pmatch[1].rm_so;
  127         return line.substr(start,len);
  128     }
  129     else {
  130         return std::string();
  131     }
  132 }
  133 
  134 int regex_list::readfile(std::string fname)
  135 {
  136     std::ifstream f(fname.c_str());
  137     if(f.is_open()){
  138         while(!f.eof()){
  139             std::string line;
  140             getline(f,line);
  141             if(line.size()>0 && (*line.end())=='\r'){
  142                 line.erase(line.end()); /* remove the last character while it is a \n or \r */
  143             }
  144             patterns.push_back(new beregex(line,0));
  145         }
  146         f.close();
  147         return 0;
  148     }
  149     return -1;
  150 }
  151 
  152 void regex_list::add_regex(const std::string &pat)
  153 {
  154     patterns.push_back(new beregex(pat,0));
  155 }
  156 
  157 
  158 /* Find the FIRST match in buf */
  159 bool regex_list::check(const std::string &buf,std::string *found, size_t *offset,size_t *len) const 
  160 {
  161     /* Now check check pattern */
  162     /* First check literals, because they are faster */
  163     bool first = true;
  164     bool fnd = false;
  165     for(std::vector<beregex *>::const_iterator it=patterns.begin(); it != patterns.end(); it++){
  166         std::string nfound;
  167         size_t      noffset=0;
  168         size_t      nlen=0;
  169         if((*it)->search(buf,&nfound,&noffset,&nlen)){
  170             if(first || noffset<*offset){
  171                 fnd     = true;
  172                 *found  = nfound;
  173                 *offset = noffset;
  174                 *len    = nlen;
  175                 first   = false;
  176             }
  177         }
  178     }
  179     return fnd;
  180 }
  181