tcpflow  1.6.1
About: tcpflow is a TCP/IP packet demultiplexer that captures data transmitted as part of TCP connections (flows), and stores the data in a way that is convenient for protocol analysis and debugging.
  Fossies Dox: tcpflow-1.6.1.tar.gz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

histogram.h
Go to the documentation of this file.
1 #ifndef HISTOGRAM_H
2 #define HISTOGRAM_H
3 
4 /**
5  * \addtogroup internal_interfaces
6  * @{
7  */
8 
9 /* C++ Histogram classes.
10  *
11  * Eventually this may become a single class
12  */
13 
14 #include <vector>
15 #include <map>
16 
17 /**
18  * \class CharClass
19  * Examine a block of text and count the number of characters
20  * in various ranges. This is useful for determining if a block of
21  * bytes is coded in BASE16, BASE64, etc.
22  */
23 
24 class CharClass {
25 public:
26  uint32_t range_0_9; // a range_0_9 character
27  uint32_t range_A_Fi; // a-f or A-F
31  }
32  void add(uint8_t ch){
33  if(ch>='a' && ch<='f') range_A_Fi++;
34  if(ch>='A' && ch<='F') range_A_Fi++;
35  if(ch>='g' && ch<='z') range_g_z++;
36  if(ch>='G' && ch<='Z') range_G_Z++;
37  if(ch>='0' && ch<='9') range_0_9++;
38  }
39  void add(uint8_t *buf,size_t len){
40  for(size_t i=0;i<len;i++){
41  add(buf[i]);
42  }
43  }
44 };
45 
46 
47 /**
48  * \file histogram.h
49  * Unicode histogram
50  *
51  * The basis of a string-based correlator and many other features.
52  * Uses C++ STL for sorting and string handling.
53  *
54  * Summer 2011: Now is UTF-8/UTF-16 aware. All strings are stored as UTF-8.
55  * Detects UTF-16 in an add and automatically converts to UTF-8.
56  * Keeps track of UTF-16 count separately from UTF-8 count.
57  *
58  * Oct 2011: Apparently you are not supposed to subclass the STL container classes.
59  */
60 
61 
63 public:
64  static const int FLAG_LOWERCASE= 0x01;
65  static const int FLAG_NUMERIC = 0x02; // digits only
66  static uint32_t debug_histogram_malloc_fail_frequency; // for debugging, make malloc fail sometimes
67 
68  /** The ReportElement is used for creating the report of histogram frequencies.
69  * It can be thought of as the histogram bin.
70  */
72  public:
73  uint32_t count; // total strings seen
74  uint32_t count16; // total utf16 strings seen
76  virtual ~histogramTally(){};
77  };
78 
79  /** The ReportElement is used for creating the report of histogram frequencies.
80  * It can be thought of as the histogram bin.
81  */
82  struct ReportElement {
83  ReportElement(std::string aValue,histogramTally aTally):value(aValue),tally(aTally){ }
84  const std::string value; // UTF-8
86  static bool compare_ref(const ReportElement &e1,const ReportElement &e2) {
87  if (e1.tally.count > e2.tally.count) return true;
88  if (e1.tally.count < e2.tally.count) return false;
89  return e1.value < e2.value;
90  }
91  static bool compare(const ReportElement *e1,const ReportElement *e2) {
92  if (e1->tally.count > e2->tally.count) return true;
93  if (e1->tally.count < e2->tally.count) return false;
94  return e1->value < e2->value;
95  }
96  virtual ~ReportElement(){};
97  };
98 
99 private:
100  /** A HistogramMap holds the histogram while it is being computed.
101  */
102  typedef std::map<std::string,histogramTally> HistogramMap;
103  HistogramMap h; // holds the histogram
104  uint32_t flags; // see above
105 public:
106 
107  /**
108  * Determine if a string probably has utf16.
109  */
110  static bool looks_like_utf16(const std::string &str,bool &little_endian);
111 
112  /* These all allocate a string that must be freed */
113 
114  static std::string *convert_utf16_to_utf8(const std::string &str);
115  static std::string *convert_utf16_to_utf8(const std::string &str,bool little_endian);
116  static std::string *make_utf8(const std::string &key);
117 
118  HistogramMaker(uint32_t flags_):h(),flags(flags_){}
119  void clear(){h.clear();}
120  void add(const std::string &key); // adds a string to the histogram count
121 
122  /** A FrequencyReportVector is a vector of report elements when the report is generated.
123  */
124  typedef std::vector<ReportElement *> FrequencyReportVector;
125  /** makeReport() makes a report and returns a
126  * FrequencyReportVector.
127  */
128  FrequencyReportVector *makeReport() const; // return a report with all of them
129  FrequencyReportVector *makeReport(int topN) const; // returns just the topN
130  virtual ~HistogramMaker(){};
131 };
132 
133 std::ostream & operator <<(std::ostream &os,const HistogramMaker::FrequencyReportVector &rep);
134 
135 #endif
FrequencyReportVector * makeReport() const
Definition: histogram.cpp:28
ReportElement(std::string aValue, histogramTally aTally)
Definition: histogram.h:83
static bool looks_like_utf16(const std::string &str, bool &little_endian)
Definition: histogram.cpp:57
uint32_t range_0_9
Definition: histogram.h:26
const std::string value
Definition: histogram.h:84
void clear()
Definition: histogram.h:119
static const int FLAG_LOWERCASE
Definition: histogram.h:64
uint32_t range_G_Z
Definition: histogram.h:29
static std::string * make_utf8(const std::string &key)
Definition: histogram.cpp:128
static uint32_t debug_histogram_malloc_fail_frequency
Definition: histogram.h:66
uint32_t range_g_z
Definition: histogram.h:28
void add(uint8_t ch)
Definition: histogram.h:32
static const int FLAG_NUMERIC
Definition: histogram.h:65
CharClass()
Definition: histogram.h:30
void add(const std::string &key)
Definition: histogram.cpp:142
HistogramMap h
Definition: histogram.h:103
static bool compare(const ReportElement *e1, const ReportElement *e2)
Definition: histogram.h:91
static bool compare_ref(const ReportElement &e1, const ReportElement &e2)
Definition: histogram.h:86
HistogramMaker(uint32_t flags_)
Definition: histogram.h:118
std::map< std::string, histogramTally > HistogramMap
Definition: histogram.h:102
uint32_t flags
Definition: histogram.h:104
std::ostream & operator<<(std::ostream &os, const HistogramMaker::FrequencyReportVector &rep)
Definition: histogram.cpp:18
uint32_t range_A_Fi
Definition: histogram.h:27
virtual ~HistogramMaker()
Definition: histogram.h:130
std::vector< ReportElement * > FrequencyReportVector
Definition: histogram.h:124
void add(uint8_t *buf, size_t len)
Definition: histogram.h:39
static std::string * convert_utf16_to_utf8(const std::string &str)
Definition: histogram.cpp:119
flags
Definition: http_parser.h:216
unsigned int uint32_t
Definition: core.h:40
unsigned char uint8_t
Definition: util.h:6