tcpflow  1.6.1
About: tcpflow is a TCP/IP packet demultiplexer that captures data transmitted as part of TCP connections (flows), and stores the data in a way that is convenient for protocol analysis and debugging.
  Fossies Dox: tcpflow-1.6.1.tar.gz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

bulk_extractor_i.h
Go to the documentation of this file.
1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 
3 /*
4  * By design, this file can be read without reading config.h
5  * #include "config.h" must appear as the first line of your .cpp file.
6  */
7 
8 #ifndef PACKAGE_NAME
9 #error bulk_extractor_i.h included before config.h
10 #endif
11 
12 #ifndef BULK_EXTRACTOR_I_H
13 #define BULK_EXTRACTOR_I_H
14 
15 #define DEBUG_PEDANTIC 0x0001 // check values more rigorously
16 #define DEBUG_PRINT_STEPS 0x0002 // prints as each scanner is started
17 #define DEBUG_SCANNER 0x0004 // dump all feature writes to stderr
18 #define DEBUG_NO_SCANNERS 0x0008 // do not run the scanners
19 #define DEBUG_DUMP_DATA 0x0010 // dump data as it is seen
20 #define DEBUG_DECODING 0x0020 // debug decoders in scanner
21 #define DEBUG_INFO 0x0040 // print extra info
22 #define DEBUG_EXIT_EARLY 1000 // just print the size of the volume and exis
23 #define DEBUG_ALLOCATE_512MiB 1002 // Allocate 512MiB, but don't set any flags
24 
25 /* We need netinet/in.h or windowsx.h */
26 #ifdef HAVE_NETINET_IN_H
27 # include <netinet/in.h>
28 #endif
29 
30 #include <assert.h>
31 
32 #if defined(MINGW) || defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__)
33 #ifndef WIN32
34 #define WIN32
35 #endif
36 #endif
37 
38 #if defined(WIN32) || defined(__MINGW32__)
39 # include <winsock2.h>
40 # include <windows.h>
41 # include <windowsx.h>
42 #endif
43 
44 /* If byte_order hasn't been defined, assume its intel */
45 
46 #if defined(WIN32) || !defined(__BYTE_ORDER)
47 # define __LITTLE_ENDIAN 1234
48 # define __BIG_ENDIAN 4321
49 # define __BYTE_ORDER __LITTLE_ENDIAN
50 #endif
51 
52 #if (__BYTE_ORDER == __LITTLE_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN)
53 # error Invalid __BYTE_ORDER
54 #endif
55 
56 /**
57  * \addtogroup plugin_module
58  * @{
59  */
60 
61 /**
62  * \file
63  * bulk_extractor scanner plug_in architecture.
64  *
65  * Scanners are called with two parameters:
66  * A reference to a scanner_params (SP) object.
67  * A reference to a recursion_control_block (RCB) object.
68  *
69  * On startup, each scanner is called with a special SP and RCB.
70  * The scanners respond by setting fields in the SP and returning.
71  *
72  * When executing, once again each scanner is called with the SP and RCB.
73  * This is the only file that needs to be included for a scanner.
74  *
75  * \li \c phase_startup - scanners are loaded and register the names of the feature files they want.
76  * \li \c phase_scan - each scanner is called to analyze 1 or more sbufs.
77  * \li \c phase_shutdown - scanners are given a chance to shutdown
78  */
79 
80 #ifndef __cplusplus
81 # error bulk_extractor_i.h requires C++
82 #endif
83 
84 #include "sbuf.h"
85 #include "utf8.h"
86 #include "utils.h" // for gmtime_r
87 
88 #include <vector>
89 #include <set>
90 #include <map>
91 
92 #include "feature_recorder.h"
93 #include "feature_recorder_set.h"
94 
95 /* Network includes */
96 
97 /****************************************************************
98  *** pcap.h --- If we don't have it, fake it. ---
99  ***/
100 #ifdef HAVE_NETINET_IF_ETHER_H
101 # include <netinet/if_ether.h>
102 #endif
103 #ifdef HAVE_NETINET_IN_H
104 # include <netinet/in.h>
105 #endif
106 #ifdef HAVE_NET_ETHERNET_H
107 # include <net/ethernet.h> // for freebsd
108 #endif
109 
110 
111 #if defined(HAVE_LIBPCAP)
112 # ifdef HAVE_DIAGNOSTIC_REDUNDANT_DECLS
113 # pragma GCC diagnostic ignored "-Wredundant-decls"
114 # endif
115 # if defined(HAVE_PCAP_PCAP_H)
116 # include <pcap/pcap.h>
117 # define GOT_PCAP
118 # endif
119 # if defined(HAVE_PCAP_H) && !defined(GOT_PCAP)
120 # include <pcap.h>
121 # define GOT_PCAP
122 # endif
123 # if defined(HAVE_WPCAP_PCAP_H) && !defined(GOT_PCAP)
124 # include <wpcap/pcap.h>
125 # define GOT_PCAP
126 # endif
127 # ifdef HAVE_DIAGNOSTIC_REDUNDANT_DECLS
128 # pragma GCC diagnostic warning "-Wredundant-decls"
129 # endif
130 #else
131 # include "pcap_fake.h"
132 #endif
133 
134 /**
135  * \class scanner_params
136  * The scanner params class is the primary way that the bulk_extractor framework
137  * communicates with the scanners.
138  * @param sbuf - the buffer to be scanned
139  * @param feature_names - if fs==0, add to feature_names the feature file types that this
140  * scanner records.. The names can have a /c appended to indicate
141  * that the feature files should have context enabled. Do not scan.
142  * @param fs - where the features should be saved. Must be provided if feature_names==0.
143  **/
144 
145 /*****************************************************************
146  *** bulk_extractor has a private implementation of IPv4 and IPv6,
147  *** UDP and TCP.
148  ***
149  *** We did this becuase we found slightly different versions on
150  *** MacOS, Ubuntu Linux, Fedora Linux, Centos, Mingw, and Cygwin.
151  *** TCP/IP isn't changing anytime soon, and when it changes (as it
152  *** did with IPv6), these different systems all implemented it slightly
153  *** differently, and that caused a lot of problems for us.
154  *** So the BE13 API has a single implementation and it's good enough
155  *** for our uses.
156  ***/
157 
158 namespace be13 {
159 
160 #ifndef ETH_ALEN
161 # define ETH_ALEN 6 // ethernet address len
162 #endif
163 
164 #ifndef IPPROTO_TCP
165 # define IPPROTO_TCP 6 /* tcp */
166 #endif
167 
168  struct ether_addr {
170  } __attribute__ ((__packed__));
171 
172  /* 10Mb/s ethernet header */
173  struct ether_header {
174  uint8_t ether_dhost[ETH_ALEN]; /* destination eth addr */
175  uint8_t ether_shost[ETH_ALEN]; /* source ether addr */
176  uint16_t ether_type; /* packet type ID field */
177  } __attribute__ ((__packed__));
178 
179  /* The mess below is becuase these items are typedefs and
180  * structs on some systems and #defines on other systems
181  * So in the interest of portability we need to define *new*
182  * structures that are only used here
183  */
184 
185  typedef uint32_t ip4_addr_t; // historical
186 
187  // on windows we use the definition that's in winsock
188  struct ip4_addr {
190  };
191 
192  /*
193  * Structure of an internet header, naked of options.
194  */
195  struct ip4 {
196 #if __BYTE_ORDER == __LITTLE_ENDIAN
197  uint8_t ip_hl:4; /* header length */
198  uint8_t ip_v:4; /* version */
199 #endif
200 #if __BYTE_ORDER == __BIG_ENDIAN
201  uint8_t ip_v:4; /* version */
202  uint8_t ip_hl:4; /* header length */
203 #endif
204  uint8_t ip_tos; /* type of service */
205  uint16_t ip_len; /* total length */
206  uint16_t ip_id; /* identification */
207  uint16_t ip_off; /* fragment offset field */
208 #define IP_RF 0x8000 /* reserved fragment flag */
209 #define IP_DF 0x4000 /* dont fragment flag */
210 #define IP_MF 0x2000 /* more fragments flag */
211 #define IP_OFFMASK 0x1fff /* mask for fragmenting bits */
212  uint8_t ip_ttl; /* time to live */
213  uint8_t ip_p; /* protocol */
214  uint16_t ip_sum; /* checksum */
215  struct ip4_addr ip_src, ip_dst; /* source and dest address */
216  } __attribute__ ((__packed__));
217 
218  struct ip4_dgram {
219  const struct ip4 *header;
220  const uint8_t *payload;
222  };
223 
224  /*
225  * IPv6 header structure
226  */
227  struct ip6_addr { // our own private ipv6 definition
228  union {
229  uint8_t addr8[16]; // three ways to get the data
232  } addr; /* 128-bit IP6 address */
233  };
234  struct ip6_hdr {
235  union {
236  struct ip6_hdrctl {
237  uint32_t ip6_un1_flow; /* 20 bits of flow-ID */
238  uint16_t ip6_un1_plen; /* payload length */
239  uint8_t ip6_un1_nxt; /* next header */
240  uint8_t ip6_un1_hlim; /* hop limit */
242  uint8_t ip6_un2_vfc; /* 4 bits version, top 4 bits class */
244  struct ip6_addr ip6_src; /* source address */
245  struct ip6_addr ip6_dst; /* destination address */
246  } __attribute__((__packed__));
247 
248  struct ip6_dgram {
249  const struct ip6_hdr *header;
250  const uint8_t *payload;
252  };
253 
254  /*
255  * TCP header.
256  * Per RFC 793, September, 1981.
257  */
258  typedef uint32_t tcp_seq;
259  struct tcphdr {
260  uint16_t th_sport; /* source port */
261  uint16_t th_dport; /* destination port */
262  tcp_seq th_seq; /* sequence number */
263  tcp_seq th_ack; /* acknowledgement number */
264 # if __BYTE_ORDER == __LITTLE_ENDIAN
265  uint8_t th_x2:4; /* (unused) */
266  uint8_t th_off:4; /* data offset */
267 # endif
268 # if __BYTE_ORDER == __BIG_ENDIAN
269  uint8_t th_off:4; /* data offset */
270  uint8_t th_x2:4; /* (unused) */
271 # endif
273 # define TH_FIN 0x01
274 # define TH_SYN 0x02
275 # define TH_RST 0x04
276 # define TH_PUSH 0x08
277 # define TH_ACK 0x10
278 # define TH_URG 0x20
279  uint16_t th_win; /* window */
280  uint16_t th_sum; /* checksum */
281  uint16_t th_urp; /* urgent pointer */
282 };
283 /*
284  * The packet_info structure records packets after they are read from the pcap library.
285  * It preserves the original pcap information and information decoded from the MAC and
286  * VLAN (IEEE 802.1Q) layers, as well as information that might be present from 802.11
287  * interfaces. However it does not preserve the full radiotap information.
288  *
289  * packet_info is created to make it easier to write network forensic software. It encapsulates
290  * much of the common knowledge needed to operate on packet-based IP networks.
291  *
292  * @param ts - the actual packet time to use (adjusted)
293  * @param pcap_data - Original data offset point from pcap
294  * @param data - the actual packet data, minus the MAC layer
295  * @param datalen - How much data is available at the datalen pointer
296  *
297  */
298 class packet_info {
299 public:
300  // IPv4 header offsets
301  static const size_t ip4_proto_off = 9;
302  static const size_t ip4_src_off = 12;
303  static const size_t ip4_dst_off = 16;
304  // IPv6 header offsets
305  static const size_t ip6_nxt_hdr_off = 6;
306  static const size_t ip6_plen_off = 4;
307  static const size_t ip6_src_off = 8;
308  static const size_t ip6_dst_off = 24;
309  // TCP header offsets
310  static const size_t tcp_sport_off = 0;
311  static const size_t tcp_dport_off = 2;
312 
313  class frame_too_short : public std::logic_error {
314  public:
316  std::logic_error("frame too short to contain requisite network structures") {}
317  };
318 
319  enum vlan_t {NO_VLAN=-1};
320  /** create a packet, usually an IP packet.
321  * @param d - start of MAC packet
322  * @param d2 - start of IP data
323  */
324  packet_info(const int dlt,const struct pcap_pkthdr *h,const u_char *d,
325  const struct timeval &ts_,const uint8_t *d2,size_t dl2):
326  pcap_dlt(dlt),pcap_hdr(h),pcap_data(d),ts(ts_),ip_data(d2),ip_datalen(dl2){}
327  packet_info(const int dlt,const struct pcap_pkthdr *h,const u_char *d):
328  pcap_dlt(dlt),pcap_hdr(h),pcap_data(d),ts(h->ts),ip_data(d),ip_datalen(h->caplen){}
329 
330  const int pcap_dlt; // data link type; needed by libpcap, not provided
331  const struct pcap_pkthdr *pcap_hdr; // provided by libpcap
332  const u_char *pcap_data; // provided by libpcap; where the MAC layer begins
333  const struct timeval &ts; // when packet received; possibly modified before packet_info created
334  const uint8_t *const ip_data; // pointer to where ip data begins
335  const size_t ip_datalen; // length of ip data
336 
337  static u_short nshort(const u_char *buf,size_t pos); // return a network byte order short at offset pos
338  int ip_version() const; // returns 4, 6 or 0
339  u_short ether_type() const; // returns 0 if not IEEE802, otherwise returns ether_type
340  int vlan() const; // returns NO_VLAN if not IEEE802 or not VLAN, othererwise VID
341  const uint8_t *get_ether_dhost() const; // returns a pointer to ether dhost if ether packet
342  const uint8_t *get_ether_shost() const; // returns a pointer to ether shost if ether packet
343 
344  // packet typing
345  bool is_ip4() const;
346  bool is_ip6() const;
347  bool is_ip4_tcp() const;
348  bool is_ip6_tcp() const;
349  // packet extraction
350  // IPv4 - return pointers to fields or throws frame_too_short exception
351  const struct in_addr *get_ip4_src() const;
352  const struct in_addr *get_ip4_dst() const;
353  uint8_t get_ip4_proto() const;
354  // IPv6
355  uint8_t get_ip6_nxt_hdr() const;
356  uint16_t get_ip6_plen() const;
357  const struct ip6_addr *get_ip6_src() const;
358  const struct ip6_addr *get_ip6_dst() const;
359  // TCP
360  uint16_t get_ip4_tcp_sport() const;
361  uint16_t get_ip4_tcp_dport() const;
362  uint16_t get_ip6_tcp_sport() const;
363  uint16_t get_ip6_tcp_dport() const;
364 };
365 
366 #ifdef DLT_IEEE802
367  inline u_short packet_info::ether_type() const
368  {
370  const struct ether_header *eth_header = (struct ether_header *) pcap_data;
371  return ntohs(eth_header->ether_type);
372  }
373  return 0;
374  }
375 #endif
376 
377 #ifndef ETHERTYPE_PUP
378 #define ETHERTYPE_PUP 0x0200 /* Xerox PUP */
379 #endif
380 
381 #ifndef ETHERTYPE_SPRITE
382 #define ETHERTYPE_SPRITE 0x0500 /* Sprite */
383 #endif
384 
385 #ifndef ETHERTYPE_IP
386 #define ETHERTYPE_IP 0x0800 /* IP */
387 #endif
388 
389 #ifndef ETHERTYPE_ARP
390 #define ETHERTYPE_ARP 0x0806 /* Address resolution */
391 #endif
392 
393 #ifndef ETHERTYPE_REVARP
394 #define ETHERTYPE_REVARP 0x8035 /* Reverse ARP */
395 #endif
396 
397 #ifndef ETHERTYPE_AT
398 #define ETHERTYPE_AT 0x809B /* AppleTalk protocol */
399 #endif
400 
401 #ifndef ETHERTYPE_AARP
402 #define ETHERTYPE_AARP 0x80F3 /* AppleTalk ARP */
403 #endif
404 
405 #ifndef ETHERTYPE_VLAN
406 #define ETHERTYPE_VLAN 0x8100 /* IEEE 802.1Q VLAN tagging */
407 #endif
408 
409 #ifndef ETHERTYPE_IPX
410 #define ETHERTYPE_IPX 0x8137 /* IPX */
411 #endif
412 
413 #ifndef ETHERTYPE_IPV6
414 #define ETHERTYPE_IPV6 0x86dd /* IP protocol version 6 */
415 #endif
416 
417 #ifndef ETHERTYPE_LOOPBACK
418 #define ETHERTYPE_LOOPBACK 0x9000 /* used to test interfaces */
419 #endif
420 
421 
422  inline u_short packet_info::nshort(const u_char *buf,size_t pos)
423  {
424  return (buf[pos]<<8) | (buf[pos+1]);
425  }
426 
427  inline int packet_info::vlan() const
428  {
429  if(ether_type()==ETHERTYPE_VLAN){
430  return nshort(pcap_data,sizeof(struct ether_header));
431  }
432  return -1;
433  }
434 
435  inline int packet_info::ip_version() const
436  {
437  /* This takes advantage of the fact that ip4 and ip6 put the version number in the same place */
438  if (ip_datalen >= sizeof(struct ip4)) {
439  const struct ip4 *ip_header = (struct ip4 *) ip_data;
440  switch(ip_header->ip_v){
441  case 4: return 4;
442  case 6: return 6;
443  }
444  }
445  return 0;
446  }
447 
448  // packet typing
449 
450  inline bool packet_info::is_ip4() const
451  {
452  return ip_version() == 4;
453  }
454 
455  inline bool packet_info::is_ip6() const
456  {
457  return ip_version() == 6;
458  }
459 
460  inline bool packet_info::is_ip4_tcp() const
461  {
462  if(ip_datalen < sizeof(struct ip4) + sizeof(struct tcphdr)) {
463  return false;
464  }
465  return *((uint8_t*) (ip_data + ip4_proto_off)) == IPPROTO_TCP;
466  return false;
467  }
468 
469  inline bool packet_info::is_ip6_tcp() const
470  {
471  if(ip_datalen < sizeof(struct ip6_hdr) + sizeof(struct tcphdr)) {
472  return false;
473  }
474  return *((uint8_t*) (ip_data + ip6_nxt_hdr_off)) == IPPROTO_TCP;
475  }
476 
477  // packet extraction
478  // precondition: the apropriate packet type function must return true before using these functions.
479  // example: is_ip4_tcp() must return true before calling get_ip4_tcp_sport()
480 
481  // Get ether addresses; should this handle vlan and such?
482  inline const uint8_t *packet_info::get_ether_dhost() const
483  {
484  if(pcap_hdr->caplen < sizeof(struct ether_addr)){
485  throw new frame_too_short();
486  }
487  return ((const struct ether_header *)pcap_data)->ether_dhost;
488  }
489 
490  inline const uint8_t *packet_info::get_ether_shost() const
491  {
492  if(pcap_hdr->caplen < sizeof(struct ether_addr)){
493  throw new frame_too_short();
494  }
495  return ((const struct ether_header *)pcap_data)->ether_shost;
496  }
497 
498  // IPv4
499 # ifdef HAVE_DIAGNOSTIC_CAST_ALIGN
500 # pragma GCC diagnostic ignored "-Wcast-align"
501 # endif
502  inline const struct in_addr *packet_info::get_ip4_src() const
503  {
504  if(ip_datalen < sizeof(struct ip4)) {
505  throw new frame_too_short();
506  }
507  return (const struct in_addr *) ip_data + ip4_src_off;
508  }
509  inline const struct in_addr *packet_info::get_ip4_dst() const
510  {
511  if(ip_datalen < sizeof(struct ip4)) {
512  throw new frame_too_short();
513  }
514  return (const struct in_addr *) ip_data + ip4_dst_off;
515  }
516 # ifdef HAVE_DIAGNOSTIC_CAST_ALIGN
517 # pragma GCC diagnostic warning "-Wcast-align"
518 # endif
520  {
521  if(ip_datalen < sizeof(struct ip4)) {
522  throw new frame_too_short();
523  }
524  return *((uint8_t *) (ip_data + ip4_proto_off));
525  }
526  // IPv6
528  {
529  if(ip_datalen < sizeof(struct ip6_hdr)) {
530  throw new frame_too_short();
531  }
532  return *((uint8_t *) (ip_data + ip6_nxt_hdr_off));
533  }
535  {
536  if(ip_datalen < sizeof(struct ip6_hdr)) {
537  throw new frame_too_short();
538  }
539  //return ntohs(*((uint16_t *) (ip_data + ip6_plen_off)));
540  return nshort(ip_data,ip6_plen_off);
541  }
542 # ifdef HAVE_DIAGNOSTIC_CAST_ALIGN
543 # pragma GCC diagnostic ignored "-Wcast-align"
544 # endif
545  inline const struct ip6_addr *packet_info::get_ip6_src() const
546  {
547  if(ip_datalen < sizeof(struct ip6_hdr)) {
548  throw new frame_too_short();
549  }
550  return (const struct ip6_addr *) ip_data + ip6_src_off;
551  }
552  inline const struct ip6_addr *packet_info::get_ip6_dst() const
553  {
554  if(ip_datalen < sizeof(struct ip6_hdr)) {
555  throw new frame_too_short();
556  }
557  return (const struct ip6_addr *) ip_data + ip6_dst_off;
558  }
559 # ifdef HAVE_DIAGNOSTIC_CAST_ALIGN
560 # pragma GCC diagnostic warning "-Wcast-align"
561 # endif
562 
563  // TCP
565  {
566  if(ip_datalen < sizeof(struct tcphdr) + sizeof(struct ip4)) {
567  throw new frame_too_short();
568  }
569  //return ntohs(*((uint16_t *) (ip_data + sizeof(struct ip4) + tcp_sport_off)));
570  return nshort(ip_data,sizeof(struct ip4) + tcp_sport_off);
571  }
573  {
574  if(ip_datalen < sizeof(struct tcphdr) + sizeof(struct ip4)) {
575  throw new frame_too_short();
576  }
577  //return ntohs(*((uint16_t *) (ip_data + sizeof(struct ip4) + tcp_dport_off)));
578  return nshort(ip_data,sizeof(struct ip4) + tcp_dport_off); //
579 
580  }
582  {
583  if(ip_datalen < sizeof(struct tcphdr) + sizeof(struct ip6_hdr)) {
584  throw new frame_too_short();
585  }
586  //return ntohs(*((uint16_t *) (ip_data + sizeof(struct ip6_hdr) + tcp_sport_off)));
587  return nshort(ip_data,sizeof(struct ip6_hdr) + tcp_sport_off); //
588  }
590  {
591  if(ip_datalen < sizeof(struct tcphdr) + sizeof(struct ip6_hdr)) {
592  throw new frame_too_short();
593  }
594  //return ntohs(*((uint16_t *) (ip_data + sizeof(struct ip6_hdr) + tcp_dport_off)));
595  return nshort(ip_data,sizeof(struct ip6_hdr) + tcp_dport_off); //
596  }
597 };
598 
599 
600 typedef void scanner_t(const class scanner_params &sp,const class recursion_control_block &rcb);
601 typedef void process_t(const class scanner_params &sp);
602 typedef void packet_callback_t(void *user,const be13::packet_info &pi);
603 
604 /** scanner_info gets filled in by the scanner to tell the caller about the scanner.
605  *
606  */
608 private:
609  static std::stringstream helpstream; // where scanner info help messages are saved.
610 
611  // default copy construction and assignment are meaningless
612  // and not implemented
615  public:
616  static std::string helpstr(){return helpstream.str();}
617  typedef std::map<std::string,std::string> config_t; // configuration for scanner passed in
618 
619  /* scanner flags */
620  static const int SCANNER_DISABLED = 0x001; // v1: enabled by default
621  static const int SCANNER_NO_USAGE = 0x002; // v1: do not show scanner in usage
622  static const int SCANNER_NO_ALL = 0x004; // v2: do not enable with -eall
623  static const int SCANNER_FIND_SCANNER = 0x008; // v2: this scanner uses the find_list
624  static const int SCANNER_RECURSE = 0x010; // v3: this scanner will recurse
625  static const int SCANNER_RECURSE_EXPAND = 0x020; // v3: recurses AND result is >= original size
626  static const int SCANNER_WANTS_NGRAMS = 0x040; // v3: Scanner gets buffers that are constant n-grams
627  static const int SCANNER_FAST_FIND = 0x080; // v3: This scanner is a very fast FIND scanner
628  static const int SCANNER_DEPTH_0 = 0x100; // v3: scanner only runs at depth 0 by default
629  static const int CURRENT_SI_VERSION = 4;
630 
631  static const std::string flag_to_string(const int flag){
632  std::string ret;
633  if(flag==0) ret += "NONE ";
634  if(flag & SCANNER_DISABLED) ret += "SCANNER_DISABLED ";
635  if(flag & SCANNER_NO_USAGE) ret += "SCANNER_NO_USAGE ";
636  if(flag & SCANNER_NO_ALL) ret += "SCANNER_NO_ALL ";
637  if(flag & SCANNER_FIND_SCANNER) ret += "SCANNER_FIND_SCANNER ";
638  if(flag & SCANNER_RECURSE) ret += "SCANNER_RECURSE ";
639  if(flag & SCANNER_RECURSE_EXPAND) ret += "SCANNER_RECURSE_EXPAND ";
640  if(flag & SCANNER_WANTS_NGRAMS) ret += "SCANNER_WANTS_NGRAMS ";
641  return ret;
642  }
643 
644  /* Global config is passed to each scanner as a pointer when it is loaded.
645  * Scanner histograms are added to 'histograms' by machinery.
646  */
647  struct scanner_config {
649  virtual ~scanner_config(){}
650  config_t namevals; // v3: (input) name=val map
651  int debug; // v3: (input) current debug level
652  };
653 
654  // never change the order or delete old fields, or else you will
655  // break backwards compatability
659  /* PASSED FROM SCANNER to API: */
660  int si_version; // version number for this structure
661  std::string name; // v1: (output) scanner name
662  std::string author; // v1: (output) who wrote me?
663  std::string description; // v1: (output) what do I do?
664  std::string url; // v1: (output) where I come from
665  std::string scanner_version; // v1: (output) version for the scanner
666  uint64_t flags; // v1: (output) flags
667  std::set<std::string> feature_names; // v1: (output) features I need
668  histogram_defs_t histogram_defs; // v1: (output) histogram definition info
669  void *packet_user; // v2: (output) data for network callback
670  packet_callback_t *packet_cb; // v2: (output) callback for processing network packets, or NULL
671 
672  /* PASSED FROM API TO SCANNER; access with functions below */
673  const scanner_config *config; // v3: (intput to scanner) config
674 
675  // These methods are implemented in the plugin system for the scanner to get config information.
676  // The get_config methods should be called on the si object during PHASE_STARTUP
677  virtual void get_config(const scanner_info::config_t &c,
678  const std::string &name,std::string *val,const std::string &help);
679  virtual void get_config(const std::string &name,std::string *val,const std::string &help);
680  virtual void get_config(const std::string &name,uint64_t *val,const std::string &help);
681  virtual void get_config(const std::string &name,int32_t *val,const std::string &help);
682  virtual void get_config(const std::string &name,uint32_t *val,const std::string &help);
683  virtual void get_config(const std::string &name,uint16_t *val,const std::string &help);
684  virtual void get_config(const std::string &name,uint8_t *val,const std::string &help);
685 #ifdef __APPLE__
686  virtual void get_config(const std::string &name,size_t *val,const std::string &help);
687 #define HAVE_GET_CONFIG_SIZE_T
688 #endif
689  virtual void get_config(const std::string &name,bool *val,const std::string &help);
690  virtual ~scanner_info(){};
691 };
692 #include <map>
693 /**
694  * The scanner_params class is a way for sending the scanner parameters
695  * for this particular sbuf to be scanned.
696  */
697 
699  public:
701  static const int CURRENT_SP_VERSION=3;
702 
703  typedef std::map<std::string,std::string> PrintOptions;
705  PrintOptions::const_iterator p = po.find("print_mode_t");
706  if(p != po.end()){
707  if(p->second=="MODE_NONE") return MODE_NONE;
708  if(p->second=="MODE_HEX") return MODE_HEX;
709  if(p->second=="MODE_RAW") return MODE_RAW;
710  if(p->second=="MODE_HTTP") return MODE_HTTP;
711  }
712  return MODE_NONE;
713  }
714  static void setPrintMode(PrintOptions &po,int mode){
715  switch(mode){
716  default:
717  case MODE_NONE:po["print_mode_t"]="MODE_NONE";return;
718  case MODE_HEX:po["print_mode_t"]="MODE_HEX";return;
719  case MODE_RAW:po["print_mode_t"]="MODE_RAW";return;
720  case MODE_HTTP:po["print_mode_t"]="MODE_HTTP";return;
721  }
722  }
723 
724  // phase_t specifies when the scanner is being called
725  typedef enum {
727  PHASE_STARTUP = 0, // called in main thread when scanner loads; called on EVERY scanner (called for help)
728  PHASE_INIT = 3, // called in main thread for every ENABLED scanner after all scanners loaded
729  PHASE_THREAD_BEFORE_SCAN = 4, // called in worker thread for every ENABLED scanner before first scan
730  PHASE_SCAN = 1, // called in worker thread for every ENABLED scanner to scan an sbuf
731  PHASE_SHUTDOWN = 2, // called in main thread for every ENABLED scanner when scanner is shutdown
732  } phase_t ;
733  static PrintOptions no_options; // in common.cpp
734 
735  /********************
736  *** CONSTRUCTORS ***
737  ********************/
738 
739  /* A scanner params with all of the instance variables, typically for scanning */
740  scanner_params(phase_t phase_,const sbuf_t &sbuf_,class feature_recorder_set &fs_,
741  PrintOptions &print_options_):
743  phase(phase_),sbuf(sbuf_),fs(fs_),depth(0),print_options(print_options_),info(0),sxml(0){
744  }
745 
746  /* A scanner params with no print options */
747  scanner_params(phase_t phase_,const sbuf_t &sbuf_, class feature_recorder_set &fs_):
749  phase(phase_),sbuf(sbuf_),fs(fs_),depth(0),print_options(no_options),info(0),sxml(0){
750  }
751 
752  /* A scanner params with no print options but an xmlstream */
753  scanner_params(phase_t phase_,const sbuf_t &sbuf_,class feature_recorder_set &fs_,std::stringstream *xmladd):
755  phase(phase_),sbuf(sbuf_),fs(fs_),depth(0),print_options(no_options),info(0),sxml(xmladd){
756  }
757 
758  /** Construct a scanner_params for recursion from an existing sp and a new sbuf.
759  * Defaults to phase1
760  */
761  scanner_params(const scanner_params &sp_existing,const sbuf_t &sbuf_new):
763  sbuf(sbuf_new),fs(sp_existing.fs),depth(sp_existing.depth+1),
764  print_options(sp_existing.print_options),info(sp_existing.info),sxml(0){
765  assert(sp_existing.sp_version==CURRENT_SP_VERSION);
766  };
767 
768  /**
769  * A scanner params with an empty info
770  */
771 
772  /**************************
773  *** INSTANCE VARIABLES ***
774  **************************/
775 
776  const int sp_version; /* version number of this structure */
777  const phase_t phase; /* v1: 0=startup, 1=normal, 2=shutdown (changed to phase_t in v1.3) */
778  const sbuf_t &sbuf; /* v1: what to scan / only valid in SCAN_PHASE */
779  class feature_recorder_set &fs; /* v1: where to put the results / only valid in SCAN_PHASE */
780  const uint32_t depth; /* v1: how far down are we? / only valid in SCAN_PHASE */
781 
782  PrintOptions &print_options; /* v1: how to print / NOT USED IN SCANNERS */
783  scanner_info *info; /* v2: set/get parameters on startup, hasher */
784  std::stringstream *sxml; /* v3: on scanning and shutdown: CDATA added to XML stream (advanced feature) */
785 };
786 
787 
788 inline std::ostream & operator <<(std::ostream &os,const class scanner_params &sp){
789  os << "scanner_params(" << sp.sbuf << ")";
790  return os;
791 };
792 
794  public:
795 /**
796  * @param callback_ - the function to call back
797  * @param partName_ - the part of the forensic path processed by this scanner.
798  */
799  recursion_control_block(process_t *callback_,std::string partName_):
800  callback(callback_),partName(partName_){}
802  std::string partName; /* eg "ZIP", "GZIP" */
803 };
804 
805 /* plugin.cpp. This will become a class... */
806 class scanner_def {
807 public:;
808  static uint32_t max_depth; // maximum depth to scan for the scanners
809  static uint32_t max_ngram; // maximum ngram size to change
811  scanner_t *scanner; // pointer to the primary entry point
812  bool enabled; // is enabled?
813  scanner_info info; // info block sent to and returned by scanner
814  std::string pathPrefix; /* path prefix for recursive scanners */
815 };
816 
817 namespace be13 {
818  /* plugin.cpp */
819 
820  struct plugin {
821  typedef std::vector<scanner_def *> scanner_vector;
822  static scanner_vector current_scanners; // current scanners
823  static bool dup_data_alerts; // notify when duplicate data is not processed
824  static uint64_t dup_data_encountered; // amount of dup data encountered
825 
826  static void set_scanner_debug(int debug);
827 
828  static void load_scanner(scanner_t scanner,const scanner_info::scanner_config &sc); // load a specific scanner
829  static void load_scanner_file(std::string fn,const scanner_info::scanner_config &sc); // load a scanner from a file
830  static void load_scanners(scanner_t * const *scanners_builtin,const scanner_info::scanner_config &sc); // load the scan_ plugins
831  static void load_scanner_directory(const std::string &dirname,const scanner_info::scanner_config &sc); // load scanners in the directory
832  static void load_scanner_directories(const std::vector<std::string> &dirnames,const scanner_info::scanner_config &sc);
833  static void load_scanner_packet_handlers();
834 
835  // send every enabled scanner the phase message
837 
838  // returns the named scanner, or 0 if no scanner of that name
839  static scanner_t *find_scanner(const std::string &name);
840  static void get_enabled_scanners(std::vector<std::string> &svector); // put the enabled scanners into the vector
842  static bool find_scanner_enabled(); // return true if a find scanner is enabled
843 
844  // print info about the scanners:
845  static void scanners_disable_all(); // saves a command to disable all
846  static void scanners_enable_all(); // enable all of them
847  static void set_scanner_enabled(const std::string &name,bool enable);
848  static void set_scanner_enabled_all(bool enable);
849  static void scanners_enable(const std::string &name); // saves a command to enable this scanner
850  static void scanners_disable(const std::string &name); // saves a command to disable this scanner
851  static void scanners_process_enable_disable_commands(); // process the enable/disable and config commands
852  static void scanners_init(feature_recorder_set &fs); // init the scanners
853 
854  static void info_scanners(bool detailed_info,
855  bool detailed_settings,
856  scanner_t * const *scanners_builtin,const char enable_opt,const char disable_opt);
857 
858 
859  /* Run the phases on the scanners */
860  static void phase_shutdown(feature_recorder_set &fs,std::stringstream *sxml=0); // sxml is where to put XML from scanners that shutdown
861  static uint32_t get_max_depth_seen();
862  static void process_sbuf(const class scanner_params &sp); /* process for feature extraction */
863  static void process_packet(const be13::packet_info &pi);
864 
865  /* recorders */
866  static void get_scanner_feature_file_names(feature_file_names_t &feature_file_names);
867 
868  };
869 };
870 
871 inline std::string itos(int i){ std::stringstream ss; ss << i;return ss.str();}
872 inline std::string dtos(double d){ std::stringstream ss; ss << d;return ss.str();}
873 inline std::string utos(unsigned int i){ std::stringstream ss; ss << i;return ss.str();}
874 inline std::string utos(uint64_t i){ std::stringstream ss; ss << i;return ss.str();}
875 inline std::string utos(uint16_t i){ std::stringstream ss; ss << i;return ss.str();}
876 inline std::string safe_utf16to8(std::wstring s){ // needs to be cleaned up
877  std::string utf8_line;
878  try {
879  utf8::utf16to8(s.begin(),s.end(),back_inserter(utf8_line));
880  } catch(utf8::invalid_utf16 const &){
881  /* Exception thrown: bad UTF16 encoding */
882  utf8_line = "";
883  }
884  return utf8_line;
885 }
886 
887 inline std::wstring safe_utf8to16(std::string s){ // needs to be cleaned up
888  std::wstring utf16_line;
889  try {
890  utf8::utf8to16(s.begin(),s.end(),back_inserter(utf16_line));
891  } catch(utf8::invalid_utf8 const &){
892  /* Exception thrown: bad UTF16 encoding */
893  utf16_line = L"";
894  }
895  return utf16_line;
896 }
897 
898 // truncate string at the matching char
899 inline void truncate_at(std::string &line, char ch) {
900  size_t pos = line.find(ch);
901  if(pos != std::string::npos) line.resize(pos);
902 }
903 
904 #ifndef HAVE_ISXDIGIT
905 inline int isxdigit(int c)
906 {
907  return (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F');
908 }
909 #endif
910 
911 /* Useful functions for scanners */
912 #define ONE_HUNDRED_NANO_SEC_TO_SECONDS 10000000
913 #define SECONDS_BETWEEN_WIN32_EPOCH_AND_UNIX_EPOCH 11644473600LL
914 /*
915  * 11644473600 is the number of seconds between the Win32 epoch
916  * and the Unix epoch.
917  *
918  * http://arstechnica.com/civis/viewtopic.php?f=20&t=111992
919  * gmtime_r() is Linux-specific. You'll find a copy in util.cpp for Windows.
920  */
921 
922 inline std::string microsoftDateToISODate(const uint64_t &time)
923 {
925 
926  struct tm time_tm;
927  gmtime_r(&tmp, &time_tm);
928  char buf[256];
929  strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%SZ", &time_tm); // Zulu time
930  return std::string(buf);
931 }
932 
933 /* Convert Unix timestamp to ISO format */
934 inline std::string unixTimeToISODate(const uint64_t &t)
935 {
936  struct tm time_tm;
937  time_t tmp=t;
938  gmtime_r(&tmp, &time_tm);
939  char buf[256];
940  strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%SZ", &time_tm); // Zulu time
941  return std::string(buf);
942 }
943 
944 /* Many internal windows and Linux structures require a valid printable name in ASCII */
945 inline bool validASCIIName(const std::string &name)
946 {
947  for(size_t i = 0; i< name.size(); i++){
948  if(((u_char)name[i]) & 0x80) return false; // high bit should not be set
949  if(((u_char)name[i]) < ' ') return false; // should not be control character
950  if(((u_char)name[i]) == 0x7f) return false; // DEL is not printable
951  }
952  return true;
953 }
954 
955 #endif
Definition: sbuf.h:221
static int debug
std::set< histogram_def > histogram_defs_t
std::set< std::string > feature_file_names_t
static uint32_t max_ngram
std::map< std::string, std::string > config_t
static bool dup_data_alerts
static void phase_shutdown(feature_recorder_set &fs, std::stringstream *sxml=0)
Definition: plugin.cpp:395
scanner_t * scanner
bool is_ip6_tcp() const
static void load_scanner(scanner_t scanner, const scanner_info::scanner_config &sc)
Definition: plugin.cpp:136
bool validASCIIName(const std::string &name)
std::string name
static bool find_scanner_enabled()
Definition: plugin.cpp:314
static const int SCANNER_NO_USAGE
static const int SCANNER_WANTS_NGRAMS
#define ETHERTYPE_VLAN
uint16_t addr16[8]
static const size_t tcp_dport_off
static const int SCANNER_FAST_FIND
const uint8_t * get_ether_shost() const
std::wstring safe_utf8to16(std::string s)
static const int CURRENT_SP_VERSION
uint16_t ip6_un1_plen
packet_info(const int dlt, const struct pcap_pkthdr *h, const u_char *d)
void scanner_t(const class scanner_params &sp, const class recursion_control_block &rcb)
const struct ip6_addr * get_ip6_src() const
std::string itos(int i)
uint8_t get_ip4_proto() const
static uint64_t dup_data_encountered
const struct in_addr * get_ip4_dst() const
static const int SCANNER_RECURSE
void process_t(const class scanner_params &sp)
uint8_t ip_ttl
static print_mode_t getPrintMode(const PrintOptions &po)
scanner_info info
const uint8_t * payload
virtual void get_config(const scanner_info::config_t &c, const std::string &name, std::string *val, const std::string &help)
Definition: plugin.cpp:415
std::map< std::string, std::string > PrintOptions
static std::stringstream helpstream
uint16_t get_ip4_tcp_sport() const
const uint32_t depth
uint16_t ip_id
recursion_control_block(process_t *callback_, std::string partName_)
struct ip6_addr ip6_dst
scanner_params(phase_t phase_, const sbuf_t &sbuf_, class feature_recorder_set &fs_, std::stringstream *xmladd)
#define SECONDS_BETWEEN_WIN32_EPOCH_AND_UNIX_EPOCH
static uint32_t max_depth
uint16_t get_ip6_tcp_dport() const
static const size_t ip4_dst_off
static const size_t ip4_src_off
struct ip6_addr ip6_src
static const int SCANNER_RECURSE_EXPAND
static scanner_vector current_scanners
static void load_scanner_directories(const std::vector< std::string > &dirnames, const scanner_info::scanner_config &sc)
Definition: plugin.cpp:260
static const size_t ip6_nxt_hdr_off
static void process_packet(const be13::packet_info &pi)
Definition: plugin.cpp:707
uint16_t get_ip6_tcp_sport() const
std::ostream & operator<<(std::ostream &os, const class scanner_params &sp)
static void message_enabled_scanners(scanner_params::phase_t phase, feature_recorder_set &fs)
Definition: plugin.cpp:281
void packet_callback_t(void *user, const be13::packet_info &pi)
std::string unixTimeToISODate(const uint64_t &t)
const u_char * pcap_data
histogram_defs_t histogram_defs
std::stringstream * sxml
static PrintOptions no_options
const struct pcap_pkthdr * pcap_hdr
static u_short nshort(const u_char *buf, size_t pos)
static void add_enabled_scanner_histograms_to_feature_recorder_set(feature_recorder_set &fs)
Definition: plugin.cpp:326
static void scanners_enable(const std::string &name)
Definition: plugin.cpp:363
u_short ether_type() const
uint32_t ip4_addr_t
static void info_scanners(bool detailed_info, bool detailed_settings, scanner_t *const *scanners_builtin, const char enable_opt, const char disable_opt)
Definition: plugin.cpp:486
std::string microsoftDateToISODate(const uint64_t &time)
packet_callback_t * packet_cb
uint8_t addr8[16]
void truncate_at(std::string &line, char ch)
uint32_t ip6_un1_flow
uint8_t ip_tos
const size_t ip_datalen
static std::string helpstr()
union be13::ip6_addr::@0 addr
uint8_t get_ip6_nxt_hdr() const
std::vector< scanner_def * > scanner_vector
const struct ip6_hdr * header
std::string author
static const int CURRENT_SI_VERSION
const scanner_config * config
uint16_t ip_off
uint16_t get_ip4_tcp_dport() const
union be13::ip6_hdr::@1 ip6_ctlun
static const size_t ip6_dst_off
const uint8_t * payload
static void load_scanner_packet_handlers()
Definition: plugin.cpp:268
scanner_info(const scanner_info &i)
static void scanners_init(feature_recorder_set &fs)
Definition: plugin.cpp:339
static void set_scanner_enabled(const std::string &name, bool enable)
Definition: plugin.cpp:95
scanner_info * info
static void process_sbuf(const class scanner_params &sp)
Definition: plugin.cpp:577
std::string safe_utf16to8(std::wstring s)
#define IPPROTO_TCP
#define ETH_ALEN
const uint8_t *const ip_data
static const size_t ip4_proto_off
static void get_enabled_scanners(std::vector< std::string > &svector)
Definition: plugin.cpp:305
struct be13::ip6_hdr::@1::ip6_hdrctl ip6_un1
uint8_t ether_addr_octet[6]
static const int SCANNER_DISABLED
static void scanners_process_enable_disable_commands()
Definition: plugin.cpp:375
static void scanners_enable_all()
Definition: plugin.cpp:357
const struct ip4 * header
uint32_t addr32[4]
std::set< std::string > feature_names
const struct ip6_addr * get_ip6_dst() const
scanner_info & operator=(const scanner_info &i)
#define ONE_HUNDRED_NANO_SEC_TO_SECONDS
static void load_scanner_file(std::string fn, const scanner_info::scanner_config &sc)
Definition: plugin.cpp:178
std::string utos(unsigned int i)
uint16_t get_ip6_plen() const
static void load_scanners(scanner_t *const *scanners_builtin, const scanner_info::scanner_config &sc)
Definition: plugin.cpp:229
const struct timeval & ts
static void get_scanner_feature_file_names(feature_file_names_t &feature_file_names)
Definition: plugin.cpp:715
uint16_t ip_sum
static uint32_t get_max_depth_seen()
Definition: plugin.cpp:566
static void scanners_disable(const std::string &name)
Definition: plugin.cpp:369
static void setPrintMode(PrintOptions &po, int mode)
packet_info(const int dlt, const struct pcap_pkthdr *h, const u_char *d, const struct timeval &ts_, const uint8_t *d2, size_t dl2)
std::string scanner_version
static const std::string flag_to_string(const int flag)
static const size_t ip6_plen_off
std::string description
static const size_t tcp_sport_off
const int sp_version
static void set_scanner_enabled_all(bool enable)
Definition: plugin.cpp:111
static const size_t ip6_src_off
std::string pathPrefix
uint16_t ip_len
std::string url
int isxdigit(int c)
int ip_version() const
class feature_recorder_set & fs
static void scanners_disable_all()
Definition: plugin.cpp:351
PrintOptions & print_options
uint32_t tcp_seq
scanner_params(phase_t phase_, const sbuf_t &sbuf_, class feature_recorder_set &fs_)
virtual ~scanner_info()
const phase_t phase
static void load_scanner_directory(const std::string &dirname, const scanner_info::scanner_config &sc)
Definition: plugin.cpp:236
const sbuf_t & sbuf
std::string dtos(double d)
static scanner_t * find_scanner(const std::string &name)
Definition: plugin.cpp:294
static void set_scanner_debug(int debug)
Definition: plugin.cpp:79
bool is_ip4_tcp() const
const struct in_addr * get_ip4_src() const
struct ip4_addr ip_src ip_dst
scanner_params(const scanner_params &sp_existing, const sbuf_t &sbuf_new)
scanner_params(phase_t phase_, const sbuf_t &sbuf_, class feature_recorder_set &fs_, PrintOptions &print_options_)
static const int SCANNER_DEPTH_0
static const int SCANNER_NO_ALL
const uint8_t * get_ether_dhost() const
static const int SCANNER_FIND_SCANNER
const char * name
Definition: http_parser.c:465
flags
Definition: http_parser.h:216
u16bit_iterator utf8to16(octet_iterator start, octet_iterator end, u16bit_iterator result)
Definition: checked.h:234
unsigned int uint32_t
Definition: core.h:40
octet_iterator utf16to8(u16bit_iterator start, u16bit_iterator end, octet_iterator result)
Definition: checked.h:207
#define DLT_IEEE802
Definition: pcap_fake.h:74
#define DLT_EN10MB
Definition: pcap_fake.h:69
uint32_t caplen
Definition: pcap_fake.h:37
int c
Definition: tcpdemux.cpp:366
scanner_t * scanners_builtin[]
Definition: tcpflow.cpp:85
unsigned short uint16_t
Definition: util.h:7
unsigned char uint8_t
Definition: util.h:6
void gmtime_r(time_t *t, struct tm *tm)
Definition: utils.cpp:98