"Fossies" - the Fresh Open Source Software Archive

Member "tcpflow-1.6.1/src/be13_api/histogram.h" (19 Feb 2021, 4250 Bytes) of package /linux/misc/tcpflow-1.6.1.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "histogram.h" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 1.4.4_vs_1.4.5.

    1 #ifndef HISTOGRAM_H
    2 #define HISTOGRAM_H
    3 
    4 /**
    5  * \addtogroup internal_interfaces
    6  * @{
    7  */
    8 
    9 /* C++ Histogram classes.
   10  *
   11  * Eventually this may become a single class
   12  */
   13 
   14 #include <vector>
   15 #include <map>
   16 
   17 /**
   18  * \class CharClass
   19  * Examine a block of text and count the number of characters
   20  * in various ranges. This is useful for determining if a block of
   21  * bytes is coded in BASE16, BASE64, etc.
   22  */
   23 
   24 class CharClass {
   25 public:
   26     uint32_t range_0_9;         // a range_0_9 character
   27     uint32_t range_A_Fi;        // a-f or A-F
   28     uint32_t range_g_z;         // g-z
   29     uint32_t range_G_Z;         // G-Z
   30     CharClass():range_0_9(0),range_A_Fi(0),range_g_z(0),range_G_Z(0){
   31     }
   32     void add(uint8_t ch){
   33     if(ch>='a' && ch<='f') range_A_Fi++;
   34     if(ch>='A' && ch<='F') range_A_Fi++;
   35     if(ch>='g' && ch<='z') range_g_z++;
   36     if(ch>='G' && ch<='Z') range_G_Z++;
   37     if(ch>='0' && ch<='9') range_0_9++;
   38     }
   39     void add(uint8_t *buf,size_t len){
   40     for(size_t i=0;i<len;i++){
   41         add(buf[i]);
   42     }
   43     }
   44 };
   45 
   46 
   47 /**
   48  * \file histogram.h
   49  * Unicode histogram
   50  *
   51  * The basis of a string-based correlator and many other features.
   52  * Uses C++ STL for sorting and string handling.
   53  * 
   54  * Summer 2011: Now is UTF-8/UTF-16 aware. All strings are stored as UTF-8.
   55  * Detects UTF-16 in an add and automatically converts to UTF-8.
   56  * Keeps track of UTF-16 count separately from UTF-8 count.
   57  *
   58  * Oct 2011: Apparently you are not supposed to subclass the STL container classes. 
   59  */
   60         
   61 
   62 class HistogramMaker  {
   63 public:
   64     static const int FLAG_LOWERCASE= 0x01;
   65     static const int FLAG_NUMERIC  = 0x02;                    // digits only
   66     static uint32_t debug_histogram_malloc_fail_frequency;    // for debugging, make malloc fail sometimes
   67 
   68     /** The ReportElement is used for creating the report of histogram frequencies.
   69      * It can be thought of as the histogram bin.
   70      */
   71     class histogramTally {
   72     public:
   73     uint32_t count;     // total strings seen
   74     uint32_t count16;   // total utf16 strings seen
   75     histogramTally():count(0),count16(0){};
   76     virtual ~histogramTally(){};
   77     };
   78 
   79     /** The ReportElement is used for creating the report of histogram frequencies.
   80      * It can be thought of as the histogram bin.
   81      */
   82     struct ReportElement {
   83     ReportElement(std::string aValue,histogramTally aTally):value(aValue),tally(aTally){ }
   84     const std::string   value;      // UTF-8
   85     histogramTally      tally;
   86     static bool compare_ref(const ReportElement &e1,const ReportElement &e2) {
   87         if (e1.tally.count > e2.tally.count) return true;
   88         if (e1.tally.count < e2.tally.count) return false;
   89         return e1.value < e2.value;
   90     }
   91     static bool compare(const ReportElement *e1,const ReportElement *e2) {
   92         if (e1->tally.count > e2->tally.count) return true;
   93         if (e1->tally.count < e2->tally.count) return false;
   94         return e1->value < e2->value;
   95     }
   96     virtual ~ReportElement(){};
   97     };
   98 
   99 private:
  100     /** A HistogramMap holds the histogram while it is being computed.
  101      */
  102     typedef std::map<std::string,histogramTally> HistogramMap;
  103     HistogramMap h;         // holds the histogram
  104     uint32_t     flags;         // see above
  105 public:
  106 
  107     /**
  108      * Determine if a string probably has utf16.
  109      */
  110     static bool looks_like_utf16(const std::string &str,bool &little_endian); 
  111 
  112     /* These all allocate a string that must be freed */
  113 
  114     static std::string *convert_utf16_to_utf8(const std::string &str);
  115     static std::string *convert_utf16_to_utf8(const std::string &str,bool little_endian);
  116     static std::string *make_utf8(const std::string &key);
  117 
  118     HistogramMaker(uint32_t flags_):h(),flags(flags_){}
  119     void clear(){h.clear();}
  120     void add(const std::string &key);   // adds a string to the histogram count
  121 
  122     /** A FrequencyReportVector is a vector of report elements when the report is generated.
  123      */
  124     typedef std::vector<ReportElement *> FrequencyReportVector;
  125     /** makeReport() makes a report and returns a
  126      * FrequencyReportVector.
  127      */
  128     FrequencyReportVector *makeReport() const;  // return a report with all of them
  129     FrequencyReportVector *makeReport(int topN) const; // returns just the topN
  130     virtual ~HistogramMaker(){};
  131 };
  132 
  133 std::ostream & operator <<(std::ostream &os,const HistogramMaker::FrequencyReportVector &rep);
  134 
  135 #endif