tcpflow  1.6.1
About: tcpflow is a TCP/IP packet demultiplexer that captures data transmitted as part of TCP connections (flows), and stores the data in a way that is convenient for protocol analysis and debugging.
  Fossies Dox: tcpflow-1.6.1.tar.gz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

Loading...
Searching...
No Matches
bulk_extractor_i.h
Go to the documentation of this file.
1/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2
3/*
4 * By design, this file can be read without reading config.h
5 * #include "config.h" must appear as the first line of your .cpp file.
6 */
7
8#ifndef PACKAGE_NAME
9#error bulk_extractor_i.h included before config.h
10#endif
11
12#ifndef BULK_EXTRACTOR_I_H
13#define BULK_EXTRACTOR_I_H
14
15#define DEBUG_PEDANTIC 0x0001 // check values more rigorously
16#define DEBUG_PRINT_STEPS 0x0002 // prints as each scanner is started
17#define DEBUG_SCANNER 0x0004 // dump all feature writes to stderr
18#define DEBUG_NO_SCANNERS 0x0008 // do not run the scanners
19#define DEBUG_DUMP_DATA 0x0010 // dump data as it is seen
20#define DEBUG_DECODING 0x0020 // debug decoders in scanner
21#define DEBUG_INFO 0x0040 // print extra info
22#define DEBUG_EXIT_EARLY 1000 // just print the size of the volume and exis
23#define DEBUG_ALLOCATE_512MiB 1002 // Allocate 512MiB, but don't set any flags
24
25/* We need netinet/in.h or windowsx.h */
26#ifdef HAVE_NETINET_IN_H
27# include <netinet/in.h>
28#endif
29
30#include <assert.h>
31
32#if defined(MINGW) || defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__)
33#ifndef WIN32
34#define WIN32
35#endif
36#endif
37
38#if defined(WIN32) || defined(__MINGW32__)
39# include <winsock2.h>
40# include <windows.h>
41# include <windowsx.h>
42#endif
43
44/* If byte_order hasn't been defined, assume its intel */
45
46#if defined(WIN32) || !defined(__BYTE_ORDER)
47# define __LITTLE_ENDIAN 1234
48# define __BIG_ENDIAN 4321
49# define __BYTE_ORDER __LITTLE_ENDIAN
50#endif
51
52#if (__BYTE_ORDER == __LITTLE_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN)
53# error Invalid __BYTE_ORDER
54#endif
55
56/**
57 * \addtogroup plugin_module
58 * @{
59 */
60
61/**
62 * \file
63 * bulk_extractor scanner plug_in architecture.
64 *
65 * Scanners are called with two parameters:
66 * A reference to a scanner_params (SP) object.
67 * A reference to a recursion_control_block (RCB) object.
68 *
69 * On startup, each scanner is called with a special SP and RCB.
70 * The scanners respond by setting fields in the SP and returning.
71 *
72 * When executing, once again each scanner is called with the SP and RCB.
73 * This is the only file that needs to be included for a scanner.
74 *
75 * \li \c phase_startup - scanners are loaded and register the names of the feature files they want.
76 * \li \c phase_scan - each scanner is called to analyze 1 or more sbufs.
77 * \li \c phase_shutdown - scanners are given a chance to shutdown
78 */
79
80#ifndef __cplusplus
81# error bulk_extractor_i.h requires C++
82#endif
83
84#include "sbuf.h"
85#include "utf8.h"
86#include "utils.h" // for gmtime_r
87
88#include <vector>
89#include <set>
90#include <map>
91
92#include "feature_recorder.h"
94
95/* Network includes */
96
97/****************************************************************
98 *** pcap.h --- If we don't have it, fake it. ---
99 ***/
100#ifdef HAVE_NETINET_IF_ETHER_H
101# include <netinet/if_ether.h>
102#endif
103#ifdef HAVE_NETINET_IN_H
104# include <netinet/in.h>
105#endif
106#ifdef HAVE_NET_ETHERNET_H
107# include <net/ethernet.h> // for freebsd
108#endif
109
110
111#if defined(HAVE_LIBPCAP)
112# ifdef HAVE_DIAGNOSTIC_REDUNDANT_DECLS
113# pragma GCC diagnostic ignored "-Wredundant-decls"
114# endif
115# if defined(HAVE_PCAP_PCAP_H)
116# include <pcap/pcap.h>
117# define GOT_PCAP
118# endif
119# if defined(HAVE_PCAP_H) && !defined(GOT_PCAP)
120# include <pcap.h>
121# define GOT_PCAP
122# endif
123# if defined(HAVE_WPCAP_PCAP_H) && !defined(GOT_PCAP)
124# include <wpcap/pcap.h>
125# define GOT_PCAP
126# endif
127# ifdef HAVE_DIAGNOSTIC_REDUNDANT_DECLS
128# pragma GCC diagnostic warning "-Wredundant-decls"
129# endif
130#else
131# include "pcap_fake.h"
132#endif
133
134/**
135 * \class scanner_params
136 * The scanner params class is the primary way that the bulk_extractor framework
137 * communicates with the scanners.
138 * @param sbuf - the buffer to be scanned
139 * @param feature_names - if fs==0, add to feature_names the feature file types that this
140 * scanner records.. The names can have a /c appended to indicate
141 * that the feature files should have context enabled. Do not scan.
142 * @param fs - where the features should be saved. Must be provided if feature_names==0.
143 **/
144
145/*****************************************************************
146 *** bulk_extractor has a private implementation of IPv4 and IPv6,
147 *** UDP and TCP.
148 ***
149 *** We did this becuase we found slightly different versions on
150 *** MacOS, Ubuntu Linux, Fedora Linux, Centos, Mingw, and Cygwin.
151 *** TCP/IP isn't changing anytime soon, and when it changes (as it
152 *** did with IPv6), these different systems all implemented it slightly
153 *** differently, and that caused a lot of problems for us.
154 *** So the BE13 API has a single implementation and it's good enough
155 *** for our uses.
156 ***/
157
158namespace be13 {
159
160#ifndef ETH_ALEN
161# define ETH_ALEN 6 // ethernet address len
162#endif
163
164#ifndef IPPROTO_TCP
165# define IPPROTO_TCP 6 /* tcp */
166#endif
167
168 struct ether_addr {
170 } __attribute__ ((__packed__));
171
172 /* 10Mb/s ethernet header */
174 uint8_t ether_dhost[ETH_ALEN]; /* destination eth addr */
175 uint8_t ether_shost[ETH_ALEN]; /* source ether addr */
176 uint16_t ether_type; /* packet type ID field */
177 } __attribute__ ((__packed__));
178
179 /* The mess below is becuase these items are typedefs and
180 * structs on some systems and #defines on other systems
181 * So in the interest of portability we need to define *new*
182 * structures that are only used here
183 */
184
185 typedef uint32_t ip4_addr_t; // historical
186
187 // on windows we use the definition that's in winsock
188 struct ip4_addr {
190 };
191
192 /*
193 * Structure of an internet header, naked of options.
194 */
195 struct ip4 {
196#if __BYTE_ORDER == __LITTLE_ENDIAN
197 uint8_t ip_hl:4; /* header length */
198 uint8_t ip_v:4; /* version */
199#endif
200#if __BYTE_ORDER == __BIG_ENDIAN
201 uint8_t ip_v:4; /* version */
202 uint8_t ip_hl:4; /* header length */
203#endif
204 uint8_t ip_tos; /* type of service */
205 uint16_t ip_len; /* total length */
206 uint16_t ip_id; /* identification */
207 uint16_t ip_off; /* fragment offset field */
208#define IP_RF 0x8000 /* reserved fragment flag */
209#define IP_DF 0x4000 /* dont fragment flag */
210#define IP_MF 0x2000 /* more fragments flag */
211#define IP_OFFMASK 0x1fff /* mask for fragmenting bits */
212 uint8_t ip_ttl; /* time to live */
213 uint8_t ip_p; /* protocol */
214 uint16_t ip_sum; /* checksum */
215 struct ip4_addr ip_src, ip_dst; /* source and dest address */
216 } __attribute__ ((__packed__));
217
218 struct ip4_dgram {
219 const struct ip4 *header;
222 };
223
224 /*
225 * IPv6 header structure
226 */
227 struct ip6_addr { // our own private ipv6 definition
228 union {
229 uint8_t addr8[16]; // three ways to get the data
231 uint32_t addr32[4];
232 } addr; /* 128-bit IP6 address */
233 };
234 struct ip6_hdr {
235 union {
236 struct ip6_hdrctl {
237 uint32_t ip6_un1_flow; /* 20 bits of flow-ID */
238 uint16_t ip6_un1_plen; /* payload length */
239 uint8_t ip6_un1_nxt; /* next header */
240 uint8_t ip6_un1_hlim; /* hop limit */
242 uint8_t ip6_un2_vfc; /* 4 bits version, top 4 bits class */
244 struct ip6_addr ip6_src; /* source address */
245 struct ip6_addr ip6_dst; /* destination address */
246 } __attribute__((__packed__));
247
248 struct ip6_dgram {
249 const struct ip6_hdr *header;
252 };
253
254 /*
255 * TCP header.
256 * Per RFC 793, September, 1981.
257 */
258 typedef uint32_t tcp_seq;
259 struct tcphdr {
260 uint16_t th_sport; /* source port */
261 uint16_t th_dport; /* destination port */
262 tcp_seq th_seq; /* sequence number */
263 tcp_seq th_ack; /* acknowledgement number */
264# if __BYTE_ORDER == __LITTLE_ENDIAN
265 uint8_t th_x2:4; /* (unused) */
266 uint8_t th_off:4; /* data offset */
267# endif
268# if __BYTE_ORDER == __BIG_ENDIAN
269 uint8_t th_off:4; /* data offset */
270 uint8_t th_x2:4; /* (unused) */
271# endif
273# define TH_FIN 0x01
274# define TH_SYN 0x02
275# define TH_RST 0x04
276# define TH_PUSH 0x08
277# define TH_ACK 0x10
278# define TH_URG 0x20
279 uint16_t th_win; /* window */
280 uint16_t th_sum; /* checksum */
281 uint16_t th_urp; /* urgent pointer */
282};
283/*
284 * The packet_info structure records packets after they are read from the pcap library.
285 * It preserves the original pcap information and information decoded from the MAC and
286 * VLAN (IEEE 802.1Q) layers, as well as information that might be present from 802.11
287 * interfaces. However it does not preserve the full radiotap information.
288 *
289 * packet_info is created to make it easier to write network forensic software. It encapsulates
290 * much of the common knowledge needed to operate on packet-based IP networks.
291 *
292 * @param ts - the actual packet time to use (adjusted)
293 * @param pcap_data - Original data offset point from pcap
294 * @param data - the actual packet data, minus the MAC layer
295 * @param datalen - How much data is available at the datalen pointer
296 *
297 */
299public:
300 // IPv4 header offsets
301 static const size_t ip4_proto_off = 9;
302 static const size_t ip4_src_off = 12;
303 static const size_t ip4_dst_off = 16;
304 // IPv6 header offsets
305 static const size_t ip6_nxt_hdr_off = 6;
306 static const size_t ip6_plen_off = 4;
307 static const size_t ip6_src_off = 8;
308 static const size_t ip6_dst_off = 24;
309 // TCP header offsets
310 static const size_t tcp_sport_off = 0;
311 static const size_t tcp_dport_off = 2;
312
313 class frame_too_short : public std::logic_error {
314 public:
316 std::logic_error("frame too short to contain requisite network structures") {}
317 };
318
319 enum vlan_t {NO_VLAN=-1};
320 /** create a packet, usually an IP packet.
321 * @param d - start of MAC packet
322 * @param d2 - start of IP data
323 */
324 packet_info(const int dlt,const struct pcap_pkthdr *h,const u_char *d,
325 const struct timeval &ts_,const uint8_t *d2,size_t dl2):
326 pcap_dlt(dlt),pcap_hdr(h),pcap_data(d),ts(ts_),ip_data(d2),ip_datalen(dl2){}
327 packet_info(const int dlt,const struct pcap_pkthdr *h,const u_char *d):
328 pcap_dlt(dlt),pcap_hdr(h),pcap_data(d),ts(h->ts),ip_data(d),ip_datalen(h->caplen){}
329
330 const int pcap_dlt; // data link type; needed by libpcap, not provided
331 const struct pcap_pkthdr *pcap_hdr; // provided by libpcap
332 const u_char *pcap_data; // provided by libpcap; where the MAC layer begins
333 const struct timeval &ts; // when packet received; possibly modified before packet_info created
334 const uint8_t *const ip_data; // pointer to where ip data begins
335 const size_t ip_datalen; // length of ip data
336
337 static u_short nshort(const u_char *buf,size_t pos); // return a network byte order short at offset pos
338 int ip_version() const; // returns 4, 6 or 0
339 u_short ether_type() const; // returns 0 if not IEEE802, otherwise returns ether_type
340 int vlan() const; // returns NO_VLAN if not IEEE802 or not VLAN, othererwise VID
341 const uint8_t *get_ether_dhost() const; // returns a pointer to ether dhost if ether packet
342 const uint8_t *get_ether_shost() const; // returns a pointer to ether shost if ether packet
343
344 // packet typing
345 bool is_ip4() const;
346 bool is_ip6() const;
347 bool is_ip4_tcp() const;
348 bool is_ip6_tcp() const;
349 // packet extraction
350 // IPv4 - return pointers to fields or throws frame_too_short exception
351 const struct in_addr *get_ip4_src() const;
352 const struct in_addr *get_ip4_dst() const;
353 uint8_t get_ip4_proto() const;
354 // IPv6
355 uint8_t get_ip6_nxt_hdr() const;
356 uint16_t get_ip6_plen() const;
357 const struct ip6_addr *get_ip6_src() const;
358 const struct ip6_addr *get_ip6_dst() const;
359 // TCP
364};
365
366#ifdef DLT_IEEE802
367 inline u_short packet_info::ether_type() const
368 {
370 const struct ether_header *eth_header = (struct ether_header *) pcap_data;
371 return ntohs(eth_header->ether_type);
372 }
373 return 0;
374 }
375#endif
376
377#ifndef ETHERTYPE_PUP
378#define ETHERTYPE_PUP 0x0200 /* Xerox PUP */
379#endif
380
381#ifndef ETHERTYPE_SPRITE
382#define ETHERTYPE_SPRITE 0x0500 /* Sprite */
383#endif
384
385#ifndef ETHERTYPE_IP
386#define ETHERTYPE_IP 0x0800 /* IP */
387#endif
388
389#ifndef ETHERTYPE_ARP
390#define ETHERTYPE_ARP 0x0806 /* Address resolution */
391#endif
392
393#ifndef ETHERTYPE_REVARP
394#define ETHERTYPE_REVARP 0x8035 /* Reverse ARP */
395#endif
396
397#ifndef ETHERTYPE_AT
398#define ETHERTYPE_AT 0x809B /* AppleTalk protocol */
399#endif
400
401#ifndef ETHERTYPE_AARP
402#define ETHERTYPE_AARP 0x80F3 /* AppleTalk ARP */
403#endif
404
405#ifndef ETHERTYPE_VLAN
406#define ETHERTYPE_VLAN 0x8100 /* IEEE 802.1Q VLAN tagging */
407#endif
408
409#ifndef ETHERTYPE_IPX
410#define ETHERTYPE_IPX 0x8137 /* IPX */
411#endif
412
413#ifndef ETHERTYPE_IPV6
414#define ETHERTYPE_IPV6 0x86dd /* IP protocol version 6 */
415#endif
416
417#ifndef ETHERTYPE_LOOPBACK
418#define ETHERTYPE_LOOPBACK 0x9000 /* used to test interfaces */
419#endif
420
421
422 inline u_short packet_info::nshort(const u_char *buf,size_t pos)
423 {
424 return (buf[pos]<<8) | (buf[pos+1]);
425 }
426
427 inline int packet_info::vlan() const
428 {
430 return nshort(pcap_data,sizeof(struct ether_header));
431 }
432 return -1;
433 }
434
435 inline int packet_info::ip_version() const
436 {
437 /* This takes advantage of the fact that ip4 and ip6 put the version number in the same place */
438 if (ip_datalen >= sizeof(struct ip4)) {
439 const struct ip4 *ip_header = (struct ip4 *) ip_data;
440 switch(ip_header->ip_v){
441 case 4: return 4;
442 case 6: return 6;
443 }
444 }
445 return 0;
446 }
447
448 // packet typing
449
450 inline bool packet_info::is_ip4() const
451 {
452 return ip_version() == 4;
453 }
454
455 inline bool packet_info::is_ip6() const
456 {
457 return ip_version() == 6;
458 }
459
460 inline bool packet_info::is_ip4_tcp() const
461 {
462 if(ip_datalen < sizeof(struct ip4) + sizeof(struct tcphdr)) {
463 return false;
464 }
465 return *((uint8_t*) (ip_data + ip4_proto_off)) == IPPROTO_TCP;
466 return false;
467 }
468
469 inline bool packet_info::is_ip6_tcp() const
470 {
471 if(ip_datalen < sizeof(struct ip6_hdr) + sizeof(struct tcphdr)) {
472 return false;
473 }
474 return *((uint8_t*) (ip_data + ip6_nxt_hdr_off)) == IPPROTO_TCP;
475 }
476
477 // packet extraction
478 // precondition: the apropriate packet type function must return true before using these functions.
479 // example: is_ip4_tcp() must return true before calling get_ip4_tcp_sport()
480
481 // Get ether addresses; should this handle vlan and such?
483 {
484 if(pcap_hdr->caplen < sizeof(struct ether_addr)){
485 throw new frame_too_short();
486 }
487 return ((const struct ether_header *)pcap_data)->ether_dhost;
488 }
489
491 {
492 if(pcap_hdr->caplen < sizeof(struct ether_addr)){
493 throw new frame_too_short();
494 }
495 return ((const struct ether_header *)pcap_data)->ether_shost;
496 }
497
498 // IPv4
499# ifdef HAVE_DIAGNOSTIC_CAST_ALIGN
500# pragma GCC diagnostic ignored "-Wcast-align"
501# endif
502 inline const struct in_addr *packet_info::get_ip4_src() const
503 {
504 if(ip_datalen < sizeof(struct ip4)) {
505 throw new frame_too_short();
506 }
507 return (const struct in_addr *) ip_data + ip4_src_off;
508 }
509 inline const struct in_addr *packet_info::get_ip4_dst() const
510 {
511 if(ip_datalen < sizeof(struct ip4)) {
512 throw new frame_too_short();
513 }
514 return (const struct in_addr *) ip_data + ip4_dst_off;
515 }
516# ifdef HAVE_DIAGNOSTIC_CAST_ALIGN
517# pragma GCC diagnostic warning "-Wcast-align"
518# endif
520 {
521 if(ip_datalen < sizeof(struct ip4)) {
522 throw new frame_too_short();
523 }
524 return *((uint8_t *) (ip_data + ip4_proto_off));
525 }
526 // IPv6
528 {
529 if(ip_datalen < sizeof(struct ip6_hdr)) {
530 throw new frame_too_short();
531 }
532 return *((uint8_t *) (ip_data + ip6_nxt_hdr_off));
533 }
535 {
536 if(ip_datalen < sizeof(struct ip6_hdr)) {
537 throw new frame_too_short();
538 }
539 //return ntohs(*((uint16_t *) (ip_data + ip6_plen_off)));
541 }
542# ifdef HAVE_DIAGNOSTIC_CAST_ALIGN
543# pragma GCC diagnostic ignored "-Wcast-align"
544# endif
545 inline const struct ip6_addr *packet_info::get_ip6_src() const
546 {
547 if(ip_datalen < sizeof(struct ip6_hdr)) {
548 throw new frame_too_short();
549 }
550 return (const struct ip6_addr *) ip_data + ip6_src_off;
551 }
552 inline const struct ip6_addr *packet_info::get_ip6_dst() const
553 {
554 if(ip_datalen < sizeof(struct ip6_hdr)) {
555 throw new frame_too_short();
556 }
557 return (const struct ip6_addr *) ip_data + ip6_dst_off;
558 }
559# ifdef HAVE_DIAGNOSTIC_CAST_ALIGN
560# pragma GCC diagnostic warning "-Wcast-align"
561# endif
562
563 // TCP
565 {
566 if(ip_datalen < sizeof(struct tcphdr) + sizeof(struct ip4)) {
567 throw new frame_too_short();
568 }
569 //return ntohs(*((uint16_t *) (ip_data + sizeof(struct ip4) + tcp_sport_off)));
570 return nshort(ip_data,sizeof(struct ip4) + tcp_sport_off);
571 }
573 {
574 if(ip_datalen < sizeof(struct tcphdr) + sizeof(struct ip4)) {
575 throw new frame_too_short();
576 }
577 //return ntohs(*((uint16_t *) (ip_data + sizeof(struct ip4) + tcp_dport_off)));
578 return nshort(ip_data,sizeof(struct ip4) + tcp_dport_off); //
579
580 }
582 {
583 if(ip_datalen < sizeof(struct tcphdr) + sizeof(struct ip6_hdr)) {
584 throw new frame_too_short();
585 }
586 //return ntohs(*((uint16_t *) (ip_data + sizeof(struct ip6_hdr) + tcp_sport_off)));
587 return nshort(ip_data,sizeof(struct ip6_hdr) + tcp_sport_off); //
588 }
590 {
591 if(ip_datalen < sizeof(struct tcphdr) + sizeof(struct ip6_hdr)) {
592 throw new frame_too_short();
593 }
594 //return ntohs(*((uint16_t *) (ip_data + sizeof(struct ip6_hdr) + tcp_dport_off)));
595 return nshort(ip_data,sizeof(struct ip6_hdr) + tcp_dport_off); //
596 }
597};
598
599
600typedef void scanner_t(const class scanner_params &sp,const class recursion_control_block &rcb);
601typedef void process_t(const class scanner_params &sp);
602typedef void packet_callback_t(void *user,const be13::packet_info &pi);
603
604/** scanner_info gets filled in by the scanner to tell the caller about the scanner.
605 *
606 */
608private:
609 static std::stringstream helpstream; // where scanner info help messages are saved.
610
611 // default copy construction and assignment are meaningless
612 // and not implemented
615 public:
616 static std::string helpstr(){return helpstream.str();}
617 typedef std::map<std::string,std::string> config_t; // configuration for scanner passed in
618
619 /* scanner flags */
620 static const int SCANNER_DISABLED = 0x001; // v1: enabled by default
621 static const int SCANNER_NO_USAGE = 0x002; // v1: do not show scanner in usage
622 static const int SCANNER_NO_ALL = 0x004; // v2: do not enable with -eall
623 static const int SCANNER_FIND_SCANNER = 0x008; // v2: this scanner uses the find_list
624 static const int SCANNER_RECURSE = 0x010; // v3: this scanner will recurse
625 static const int SCANNER_RECURSE_EXPAND = 0x020; // v3: recurses AND result is >= original size
626 static const int SCANNER_WANTS_NGRAMS = 0x040; // v3: Scanner gets buffers that are constant n-grams
627 static const int SCANNER_FAST_FIND = 0x080; // v3: This scanner is a very fast FIND scanner
628 static const int SCANNER_DEPTH_0 = 0x100; // v3: scanner only runs at depth 0 by default
629 static const int CURRENT_SI_VERSION = 4;
630
631 static const std::string flag_to_string(const int flag){
632 std::string ret;
633 if(flag==0) ret += "NONE ";
634 if(flag & SCANNER_DISABLED) ret += "SCANNER_DISABLED ";
635 if(flag & SCANNER_NO_USAGE) ret += "SCANNER_NO_USAGE ";
636 if(flag & SCANNER_NO_ALL) ret += "SCANNER_NO_ALL ";
637 if(flag & SCANNER_FIND_SCANNER) ret += "SCANNER_FIND_SCANNER ";
638 if(flag & SCANNER_RECURSE) ret += "SCANNER_RECURSE ";
639 if(flag & SCANNER_RECURSE_EXPAND) ret += "SCANNER_RECURSE_EXPAND ";
640 if(flag & SCANNER_WANTS_NGRAMS) ret += "SCANNER_WANTS_NGRAMS ";
641 return ret;
642 }
643
644 /* Global config is passed to each scanner as a pointer when it is loaded.
645 * Scanner histograms are added to 'histograms' by machinery.
646 */
649 virtual ~scanner_config(){}
650 config_t namevals; // v3: (input) name=val map
651 int debug; // v3: (input) current debug level
652 };
653
654 // never change the order or delete old fields, or else you will
655 // break backwards compatability
659 /* PASSED FROM SCANNER to API: */
660 int si_version; // version number for this structure
661 std::string name; // v1: (output) scanner name
662 std::string author; // v1: (output) who wrote me?
663 std::string description; // v1: (output) what do I do?
664 std::string url; // v1: (output) where I come from
665 std::string scanner_version; // v1: (output) version for the scanner
666 uint64_t flags; // v1: (output) flags
667 std::set<std::string> feature_names; // v1: (output) features I need
668 histogram_defs_t histogram_defs; // v1: (output) histogram definition info
669 void *packet_user; // v2: (output) data for network callback
670 packet_callback_t *packet_cb; // v2: (output) callback for processing network packets, or NULL
671
672 /* PASSED FROM API TO SCANNER; access with functions below */
673 const scanner_config *config; // v3: (intput to scanner) config
674
675 // These methods are implemented in the plugin system for the scanner to get config information.
676 // The get_config methods should be called on the si object during PHASE_STARTUP
677 virtual void get_config(const scanner_info::config_t &c,
678 const std::string &name,std::string *val,const std::string &help);
679 virtual void get_config(const std::string &name,std::string *val,const std::string &help);
680 virtual void get_config(const std::string &name,uint64_t *val,const std::string &help);
681 virtual void get_config(const std::string &name,int32_t *val,const std::string &help);
682 virtual void get_config(const std::string &name,uint32_t *val,const std::string &help);
683 virtual void get_config(const std::string &name,uint16_t *val,const std::string &help);
684 virtual void get_config(const std::string &name,uint8_t *val,const std::string &help);
685#ifdef __APPLE__
686 virtual void get_config(const std::string &name,size_t *val,const std::string &help);
687#define HAVE_GET_CONFIG_SIZE_T
688#endif
689 virtual void get_config(const std::string &name,bool *val,const std::string &help);
690 virtual ~scanner_info(){};
691};
692#include <map>
693/**
694 * The scanner_params class is a way for sending the scanner parameters
695 * for this particular sbuf to be scanned.
696 */
697
699 public:
701 static const int CURRENT_SP_VERSION=3;
702
703 typedef std::map<std::string,std::string> PrintOptions;
705 PrintOptions::const_iterator p = po.find("print_mode_t");
706 if(p != po.end()){
707 if(p->second=="MODE_NONE") return MODE_NONE;
708 if(p->second=="MODE_HEX") return MODE_HEX;
709 if(p->second=="MODE_RAW") return MODE_RAW;
710 if(p->second=="MODE_HTTP") return MODE_HTTP;
711 }
712 return MODE_NONE;
713 }
714 static void setPrintMode(PrintOptions &po,int mode){
715 switch(mode){
716 default:
717 case MODE_NONE:po["print_mode_t"]="MODE_NONE";return;
718 case MODE_HEX:po["print_mode_t"]="MODE_HEX";return;
719 case MODE_RAW:po["print_mode_t"]="MODE_RAW";return;
720 case MODE_HTTP:po["print_mode_t"]="MODE_HTTP";return;
721 }
722 }
723
724 // phase_t specifies when the scanner is being called
725 typedef enum {
727 PHASE_STARTUP = 0, // called in main thread when scanner loads; called on EVERY scanner (called for help)
728 PHASE_INIT = 3, // called in main thread for every ENABLED scanner after all scanners loaded
729 PHASE_THREAD_BEFORE_SCAN = 4, // called in worker thread for every ENABLED scanner before first scan
730 PHASE_SCAN = 1, // called in worker thread for every ENABLED scanner to scan an sbuf
731 PHASE_SHUTDOWN = 2, // called in main thread for every ENABLED scanner when scanner is shutdown
732 } phase_t ;
733 static PrintOptions no_options; // in common.cpp
734
735 /********************
736 *** CONSTRUCTORS ***
737 ********************/
738
739 /* A scanner params with all of the instance variables, typically for scanning */
740 scanner_params(phase_t phase_,const sbuf_t &sbuf_,class feature_recorder_set &fs_,
741 PrintOptions &print_options_):
743 phase(phase_),sbuf(sbuf_),fs(fs_),depth(0),print_options(print_options_),info(0),sxml(0){
744 }
745
746 /* A scanner params with no print options */
747 scanner_params(phase_t phase_,const sbuf_t &sbuf_, class feature_recorder_set &fs_):
749 phase(phase_),sbuf(sbuf_),fs(fs_),depth(0),print_options(no_options),info(0),sxml(0){
750 }
751
752 /* A scanner params with no print options but an xmlstream */
753 scanner_params(phase_t phase_,const sbuf_t &sbuf_,class feature_recorder_set &fs_,std::stringstream *xmladd):
755 phase(phase_),sbuf(sbuf_),fs(fs_),depth(0),print_options(no_options),info(0),sxml(xmladd){
756 }
757
758 /** Construct a scanner_params for recursion from an existing sp and a new sbuf.
759 * Defaults to phase1
760 */
761 scanner_params(const scanner_params &sp_existing,const sbuf_t &sbuf_new):
763 sbuf(sbuf_new),fs(sp_existing.fs),depth(sp_existing.depth+1),
764 print_options(sp_existing.print_options),info(sp_existing.info),sxml(0){
765 assert(sp_existing.sp_version==CURRENT_SP_VERSION);
766 };
767
768 /**
769 * A scanner params with an empty info
770 */
771
772 /**************************
773 *** INSTANCE VARIABLES ***
774 **************************/
775
776 const int sp_version; /* version number of this structure */
777 const phase_t phase; /* v1: 0=startup, 1=normal, 2=shutdown (changed to phase_t in v1.3) */
778 const sbuf_t &sbuf; /* v1: what to scan / only valid in SCAN_PHASE */
779 class feature_recorder_set &fs; /* v1: where to put the results / only valid in SCAN_PHASE */
780 const uint32_t depth; /* v1: how far down are we? / only valid in SCAN_PHASE */
781
782 PrintOptions &print_options; /* v1: how to print / NOT USED IN SCANNERS */
783 scanner_info *info; /* v2: set/get parameters on startup, hasher */
784 std::stringstream *sxml; /* v3: on scanning and shutdown: CDATA added to XML stream (advanced feature) */
785};
786
787
788inline std::ostream & operator <<(std::ostream &os,const class scanner_params &sp){
789 os << "scanner_params(" << sp.sbuf << ")";
790 return os;
791};
792
794 public:
795/**
796 * @param callback_ - the function to call back
797 * @param partName_ - the part of the forensic path processed by this scanner.
798 */
799 recursion_control_block(process_t *callback_,std::string partName_):
800 callback(callback_),partName(partName_){}
802 std::string partName; /* eg "ZIP", "GZIP" */
803};
804
805/* plugin.cpp. This will become a class... */
807public:;
808 static uint32_t max_depth; // maximum depth to scan for the scanners
809 static uint32_t max_ngram; // maximum ngram size to change
811 scanner_t *scanner; // pointer to the primary entry point
812 bool enabled; // is enabled?
813 scanner_info info; // info block sent to and returned by scanner
814 std::string pathPrefix; /* path prefix for recursive scanners */
815};
816
817namespace be13 {
818 /* plugin.cpp */
819
820 struct plugin {
821 typedef std::vector<scanner_def *> scanner_vector;
822 static scanner_vector current_scanners; // current scanners
823 static bool dup_data_alerts; // notify when duplicate data is not processed
824 static uint64_t dup_data_encountered; // amount of dup data encountered
825
826 static void set_scanner_debug(int debug);
827
828 static void load_scanner(scanner_t scanner,const scanner_info::scanner_config &sc); // load a specific scanner
829 static void load_scanner_file(std::string fn,const scanner_info::scanner_config &sc); // load a scanner from a file
830 static void load_scanners(scanner_t * const *scanners_builtin,const scanner_info::scanner_config &sc); // load the scan_ plugins
831 static void load_scanner_directory(const std::string &dirname,const scanner_info::scanner_config &sc); // load scanners in the directory
832 static void load_scanner_directories(const std::vector<std::string> &dirnames,const scanner_info::scanner_config &sc);
833 static void load_scanner_packet_handlers();
834
835 // send every enabled scanner the phase message
837
838 // returns the named scanner, or 0 if no scanner of that name
839 static scanner_t *find_scanner(const std::string &name);
840 static void get_enabled_scanners(std::vector<std::string> &svector); // put the enabled scanners into the vector
842 static bool find_scanner_enabled(); // return true if a find scanner is enabled
843
844 // print info about the scanners:
845 static void scanners_disable_all(); // saves a command to disable all
846 static void scanners_enable_all(); // enable all of them
847 static void set_scanner_enabled(const std::string &name,bool enable);
848 static void set_scanner_enabled_all(bool enable);
849 static void scanners_enable(const std::string &name); // saves a command to enable this scanner
850 static void scanners_disable(const std::string &name); // saves a command to disable this scanner
851 static void scanners_process_enable_disable_commands(); // process the enable/disable and config commands
852 static void scanners_init(feature_recorder_set &fs); // init the scanners
853
854 static void info_scanners(bool detailed_info,
855 bool detailed_settings,
856 scanner_t * const *scanners_builtin,const char enable_opt,const char disable_opt);
857
858
859 /* Run the phases on the scanners */
860 static void phase_shutdown(feature_recorder_set &fs,std::stringstream *sxml=0); // sxml is where to put XML from scanners that shutdown
861 static uint32_t get_max_depth_seen();
862 static void process_sbuf(const class scanner_params &sp); /* process for feature extraction */
863 static void process_packet(const be13::packet_info &pi);
864
865 /* recorders */
866 static void get_scanner_feature_file_names(feature_file_names_t &feature_file_names);
867
868 };
869};
870
871inline std::string itos(int i){ std::stringstream ss; ss << i;return ss.str();}
872inline std::string dtos(double d){ std::stringstream ss; ss << d;return ss.str();}
873inline std::string utos(unsigned int i){ std::stringstream ss; ss << i;return ss.str();}
874inline std::string utos(uint64_t i){ std::stringstream ss; ss << i;return ss.str();}
875inline std::string utos(uint16_t i){ std::stringstream ss; ss << i;return ss.str();}
876inline std::string safe_utf16to8(std::wstring s){ // needs to be cleaned up
877 std::string utf8_line;
878 try {
879 utf8::utf16to8(s.begin(),s.end(),back_inserter(utf8_line));
880 } catch(utf8::invalid_utf16 const &){
881 /* Exception thrown: bad UTF16 encoding */
882 utf8_line = "";
883 }
884 return utf8_line;
885}
886
887inline std::wstring safe_utf8to16(std::string s){ // needs to be cleaned up
888 std::wstring utf16_line;
889 try {
890 utf8::utf8to16(s.begin(),s.end(),back_inserter(utf16_line));
891 } catch(utf8::invalid_utf8 const &){
892 /* Exception thrown: bad UTF16 encoding */
893 utf16_line = L"";
894 }
895 return utf16_line;
896}
897
898// truncate string at the matching char
899inline void truncate_at(std::string &line, char ch) {
900 size_t pos = line.find(ch);
901 if(pos != std::string::npos) line.resize(pos);
902}
903
904#ifndef HAVE_ISXDIGIT
905inline int isxdigit(int c)
906{
907 return (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F');
908}
909#endif
910
911/* Useful functions for scanners */
912#define ONE_HUNDRED_NANO_SEC_TO_SECONDS 10000000
913#define SECONDS_BETWEEN_WIN32_EPOCH_AND_UNIX_EPOCH 11644473600LL
914/*
915 * 11644473600 is the number of seconds between the Win32 epoch
916 * and the Unix epoch.
917 *
918 * http://arstechnica.com/civis/viewtopic.php?f=20&t=111992
919 * gmtime_r() is Linux-specific. You'll find a copy in util.cpp for Windows.
920 */
921
922inline std::string microsoftDateToISODate(const uint64_t &time)
923{
925
926 struct tm time_tm;
927 gmtime_r(&tmp, &time_tm);
928 char buf[256];
929 strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%SZ", &time_tm); // Zulu time
930 return std::string(buf);
931}
932
933/* Convert Unix timestamp to ISO format */
934inline std::string unixTimeToISODate(const uint64_t &t)
935{
936 struct tm time_tm;
937 time_t tmp=t;
938 gmtime_r(&tmp, &time_tm);
939 char buf[256];
940 strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%SZ", &time_tm); // Zulu time
941 return std::string(buf);
942}
943
944/* Many internal windows and Linux structures require a valid printable name in ASCII */
945inline bool validASCIIName(const std::string &name)
946{
947 for(size_t i = 0; i< name.size(); i++){
948 if(((u_char)name[i]) & 0x80) return false; // high bit should not be set
949 if(((u_char)name[i]) < ' ') return false; // should not be control character
950 if(((u_char)name[i]) == 0x7f) return false; // DEL is not printable
951 }
952 return true;
953}
954
955#endif
Definition: sbuf.h:221
static int debug
std::set< histogram_def > histogram_defs_t
std::set< std::string > feature_file_names_t
static uint32_t max_ngram
std::map< std::string, std::string > config_t
static bool dup_data_alerts
static void phase_shutdown(feature_recorder_set &fs, std::stringstream *sxml=0)
Definition: plugin.cpp:395
scanner_t * scanner
bool is_ip6_tcp() const
static void load_scanner(scanner_t scanner, const scanner_info::scanner_config &sc)
Definition: plugin.cpp:136
bool validASCIIName(const std::string &name)
std::string name
static bool find_scanner_enabled()
Definition: plugin.cpp:314
static const int SCANNER_NO_USAGE
static const int SCANNER_WANTS_NGRAMS
#define ETHERTYPE_VLAN
uint16_t addr16[8]
static const size_t tcp_dport_off
static const int SCANNER_FAST_FIND
const uint8_t * get_ether_shost() const
std::wstring safe_utf8to16(std::string s)
static const int CURRENT_SP_VERSION
uint16_t ip6_un1_plen
scanner_info & operator=(const scanner_info &i)
packet_info(const int dlt, const struct pcap_pkthdr *h, const u_char *d)
void scanner_t(const class scanner_params &sp, const class recursion_control_block &rcb)
const struct ip6_addr * get_ip6_src() const
std::string itos(int i)
uint8_t get_ip4_proto() const
static uint64_t dup_data_encountered
const struct in_addr * get_ip4_dst() const
static const int SCANNER_RECURSE
void process_t(const class scanner_params &sp)
uint8_t ip_ttl
static print_mode_t getPrintMode(const PrintOptions &po)
scanner_info info
const uint8_t * payload
virtual void get_config(const scanner_info::config_t &c, const std::string &name, std::string *val, const std::string &help)
Definition: plugin.cpp:415
std::map< std::string, std::string > PrintOptions
static std::stringstream helpstream
uint16_t get_ip4_tcp_sport() const
const uint32_t depth
uint16_t ip_id
recursion_control_block(process_t *callback_, std::string partName_)
struct ip6_addr ip6_dst
scanner_params(phase_t phase_, const sbuf_t &sbuf_, class feature_recorder_set &fs_, std::stringstream *xmladd)
#define SECONDS_BETWEEN_WIN32_EPOCH_AND_UNIX_EPOCH
static uint32_t max_depth
uint16_t get_ip6_tcp_dport() const
static const size_t ip4_dst_off
static const size_t ip4_src_off
struct ip6_addr ip6_src
static const int SCANNER_RECURSE_EXPAND
static scanner_vector current_scanners
static void load_scanner_directories(const std::vector< std::string > &dirnames, const scanner_info::scanner_config &sc)
Definition: plugin.cpp:260
static const size_t ip6_nxt_hdr_off
static void process_packet(const be13::packet_info &pi)
Definition: plugin.cpp:707
uint16_t get_ip6_tcp_sport() const
static void message_enabled_scanners(scanner_params::phase_t phase, feature_recorder_set &fs)
Definition: plugin.cpp:281
void packet_callback_t(void *user, const be13::packet_info &pi)
std::string unixTimeToISODate(const uint64_t &t)
const u_char * pcap_data
histogram_defs_t histogram_defs
std::stringstream * sxml
static PrintOptions no_options
const struct pcap_pkthdr * pcap_hdr
static u_short nshort(const u_char *buf, size_t pos)
static void add_enabled_scanner_histograms_to_feature_recorder_set(feature_recorder_set &fs)
Definition: plugin.cpp:326
static void scanners_enable(const std::string &name)
Definition: plugin.cpp:363
u_short ether_type() const
uint32_t ip4_addr_t
static void info_scanners(bool detailed_info, bool detailed_settings, scanner_t *const *scanners_builtin, const char enable_opt, const char disable_opt)
Definition: plugin.cpp:486
std::string microsoftDateToISODate(const uint64_t &time)
packet_callback_t * packet_cb
uint8_t addr8[16]
void truncate_at(std::string &line, char ch)
uint32_t ip6_un1_flow
uint8_t ip_tos
const size_t ip_datalen
static std::string helpstr()
std::ostream & operator<<(std::ostream &os, const class scanner_params &sp)
union be13::ip6_addr::@0 addr
uint8_t get_ip6_nxt_hdr() const
std::vector< scanner_def * > scanner_vector
const struct ip6_hdr * header
std::string author
static const int CURRENT_SI_VERSION
const scanner_config * config
uint16_t ip_off
uint16_t get_ip4_tcp_dport() const
union be13::ip6_hdr::@1 ip6_ctlun
static const size_t ip6_dst_off
const uint8_t * payload
static void load_scanner_packet_handlers()
Definition: plugin.cpp:268
scanner_info(const scanner_info &i)
static void scanners_init(feature_recorder_set &fs)
Definition: plugin.cpp:339
static void set_scanner_enabled(const std::string &name, bool enable)
Definition: plugin.cpp:95
scanner_info * info
static void process_sbuf(const class scanner_params &sp)
Definition: plugin.cpp:577
std::string safe_utf16to8(std::wstring s)
#define IPPROTO_TCP
#define ETH_ALEN
const uint8_t *const ip_data
static const size_t ip4_proto_off
static void get_enabled_scanners(std::vector< std::string > &svector)
Definition: plugin.cpp:305
struct be13::ip6_hdr::@1::ip6_hdrctl ip6_un1
uint8_t ether_addr_octet[6]
static const int SCANNER_DISABLED
static void scanners_process_enable_disable_commands()
Definition: plugin.cpp:375
static void scanners_enable_all()
Definition: plugin.cpp:357
const struct ip4 * header
uint32_t addr32[4]
std::set< std::string > feature_names
const struct ip6_addr * get_ip6_dst() const
#define ONE_HUNDRED_NANO_SEC_TO_SECONDS
static void load_scanner_file(std::string fn, const scanner_info::scanner_config &sc)
Definition: plugin.cpp:178
std::string utos(unsigned int i)
uint16_t get_ip6_plen() const
static void load_scanners(scanner_t *const *scanners_builtin, const scanner_info::scanner_config &sc)
Definition: plugin.cpp:229
const struct timeval & ts
static void get_scanner_feature_file_names(feature_file_names_t &feature_file_names)
Definition: plugin.cpp:715
uint16_t ip_sum
static uint32_t get_max_depth_seen()
Definition: plugin.cpp:566
static void scanners_disable(const std::string &name)
Definition: plugin.cpp:369
static void setPrintMode(PrintOptions &po, int mode)
packet_info(const int dlt, const struct pcap_pkthdr *h, const u_char *d, const struct timeval &ts_, const uint8_t *d2, size_t dl2)
std::string scanner_version
static const std::string flag_to_string(const int flag)
static const size_t ip6_plen_off
std::string description
static const size_t tcp_sport_off
const int sp_version
static void set_scanner_enabled_all(bool enable)
Definition: plugin.cpp:111
static const size_t ip6_src_off
std::string pathPrefix
uint16_t ip_len
std::string url
int isxdigit(int c)
int ip_version() const
class feature_recorder_set & fs
static void scanners_disable_all()
Definition: plugin.cpp:351
PrintOptions & print_options
uint32_t tcp_seq
scanner_params(phase_t phase_, const sbuf_t &sbuf_, class feature_recorder_set &fs_)
virtual ~scanner_info()
const phase_t phase
static void load_scanner_directory(const std::string &dirname, const scanner_info::scanner_config &sc)
Definition: plugin.cpp:236
const sbuf_t & sbuf
std::string dtos(double d)
static scanner_t * find_scanner(const std::string &name)
Definition: plugin.cpp:294
static void set_scanner_debug(int debug)
Definition: plugin.cpp:79
bool is_ip4_tcp() const
const struct in_addr * get_ip4_src() const
struct ip4_addr ip_src ip_dst
scanner_params(const scanner_params &sp_existing, const sbuf_t &sbuf_new)
scanner_params(phase_t phase_, const sbuf_t &sbuf_, class feature_recorder_set &fs_, PrintOptions &print_options_)
static const int SCANNER_DEPTH_0
static const int SCANNER_NO_ALL
const uint8_t * get_ether_dhost() const
static const int SCANNER_FIND_SCANNER
const char * name
Definition: http_parser.c:465
flags
Definition: http_parser.h:216
u16bit_iterator utf8to16(octet_iterator start, octet_iterator end, u16bit_iterator result)
Definition: checked.h:234
octet_iterator utf16to8(u16bit_iterator start, u16bit_iterator end, octet_iterator result)
Definition: checked.h:207
#define DLT_IEEE802
Definition: pcap_fake.h:74
#define DLT_EN10MB
Definition: pcap_fake.h:69
uint32_t caplen
Definition: pcap_fake.h:37
int c
Definition: tcpdemux.cpp:366
scanner_t * scanners_builtin[]
Definition: tcpflow.cpp:85
unsigned short uint16_t
Definition: util.h:7
unsigned char uint8_t
Definition: util.h:6
void gmtime_r(time_t *t, struct tm *tm)
Definition: utils.cpp:98