"Fossies" - the Fresh Open Source Software Archive

Member "tcpflow-1.6.1/src/be13_api/bulk_extractor_i.h" (19 Feb 2021, 37321 Bytes) of package /linux/misc/tcpflow-1.6.1.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "bulk_extractor_i.h" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 1.5.0_vs_1.6.1.

    1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
    2 
    3 /*
    4  * By design, this file can be read without reading config.h
    5  * #include "config.h" must appear as the first line of your .cpp file.
    6  */
    7 
    8 #ifndef PACKAGE_NAME
    9 #error bulk_extractor_i.h included before config.h
   10 #endif
   11 
   12 #ifndef BULK_EXTRACTOR_I_H
   13 #define BULK_EXTRACTOR_I_H
   14 
   15 #define DEBUG_PEDANTIC    0x0001        // check values more rigorously
   16 #define DEBUG_PRINT_STEPS 0x0002        // prints as each scanner is started
   17 #define DEBUG_SCANNER     0x0004        // dump all feature writes to stderr
   18 #define DEBUG_NO_SCANNERS 0x0008        // do not run the scanners
   19 #define DEBUG_DUMP_DATA   0x0010        // dump data as it is seen
   20 #define DEBUG_DECODING    0x0020        // debug decoders in scanner
   21 #define DEBUG_INFO        0x0040        // print extra info
   22 #define DEBUG_EXIT_EARLY  1000          // just print the size of the volume and exis
   23 #define DEBUG_ALLOCATE_512MiB 1002      // Allocate 512MiB, but don't set any flags
   24 
   25 /* We need netinet/in.h or windowsx.h */
   26 #ifdef HAVE_NETINET_IN_H
   27 # include <netinet/in.h>
   28 #endif
   29 
   30 #include <assert.h>
   31 
   32 #if defined(MINGW) || defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__)
   33 #ifndef WIN32
   34 #define WIN32
   35 #endif
   36 #endif
   37 
   38 #if defined(WIN32) || defined(__MINGW32__)
   39 #  include <winsock2.h>
   40 #  include <windows.h>
   41 #  include <windowsx.h>
   42 #endif
   43 
   44 /* If byte_order hasn't been defined, assume its intel */
   45 
   46 #if defined(WIN32) || !defined(__BYTE_ORDER)
   47 #  define __LITTLE_ENDIAN 1234
   48 #  define __BIG_ENDIAN    4321
   49 #  define __BYTE_ORDER __LITTLE_ENDIAN
   50 #endif
   51 
   52 #if (__BYTE_ORDER == __LITTLE_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN)
   53 #  error Invalid __BYTE_ORDER
   54 #endif
   55 
   56 /**
   57  * \addtogroup plugin_module
   58  * @{
   59  */
   60 
   61 /**
   62  * \file
   63  * bulk_extractor scanner plug_in architecture.
   64  *
   65  * Scanners are called with two parameters:
   66  * A reference to a scanner_params (SP) object.
   67  * A reference to a recursion_control_block (RCB) object.
   68  *
   69  * On startup, each scanner is called with a special SP and RCB.
   70  * The scanners respond by setting fields in the SP and returning.
   71  *
   72  * When executing, once again each scanner is called with the SP and RCB.
   73  * This is the only file that needs to be included for a scanner.
   74  *
   75  * \li \c phase_startup - scanners are loaded and register the names of the feature files they want.
   76  * \li \c phase_scan - each scanner is called to analyze 1 or more sbufs.
   77  * \li \c phase_shutdown - scanners are given a chance to shutdown
   78  */
   79 
   80 #ifndef __cplusplus
   81 # error bulk_extractor_i.h requires C++
   82 #endif
   83 
   84 #include "sbuf.h"
   85 #include "utf8.h"
   86 #include "utils.h"                      // for gmtime_r
   87 
   88 #include <vector>
   89 #include <set>
   90 #include <map>
   91 
   92 #include "feature_recorder.h"
   93 #include "feature_recorder_set.h"
   94 
   95 /* Network includes */
   96 
   97 /****************************************************************
   98  *** pcap.h --- If we don't have it, fake it. ---
   99  ***/
  100 #ifdef HAVE_NETINET_IF_ETHER_H
  101 # include <netinet/if_ether.h>
  102 #endif
  103 #ifdef HAVE_NETINET_IN_H
  104 # include <netinet/in.h>
  105 #endif
  106 #ifdef HAVE_NET_ETHERNET_H
  107 # include <net/ethernet.h>              // for freebsd
  108 #endif
  109 
  110 
  111 #if defined(HAVE_LIBPCAP)
  112 #  ifdef HAVE_DIAGNOSTIC_REDUNDANT_DECLS
  113 #    pragma GCC diagnostic ignored "-Wredundant-decls"
  114 #  endif
  115 #  if defined(HAVE_PCAP_PCAP_H)
  116 #    include <pcap/pcap.h>
  117 #    define GOT_PCAP
  118 #  endif
  119 #  if defined(HAVE_PCAP_H) && !defined(GOT_PCAP)
  120 #    include <pcap.h>
  121 #    define GOT_PCAP
  122 #  endif
  123 #  if defined(HAVE_WPCAP_PCAP_H) && !defined(GOT_PCAP)
  124 #    include <wpcap/pcap.h>
  125 #    define GOT_PCAP
  126 #  endif
  127 #  ifdef HAVE_DIAGNOSTIC_REDUNDANT_DECLS
  128 #    pragma GCC diagnostic warning "-Wredundant-decls"
  129 #  endif
  130 #else
  131 #  include "pcap_fake.h"
  132 #endif
  133 
  134 /**
  135  * \class scanner_params
  136  * The scanner params class is the primary way that the bulk_extractor framework
  137  * communicates with the scanners.
  138  * @param sbuf - the buffer to be scanned
  139  * @param feature_names - if fs==0, add to feature_names the feature file types that this
  140  *                        scanner records.. The names can have a /c appended to indicate
  141  *                        that the feature files should have context enabled. Do not scan.
  142  * @param fs   - where the features should be saved. Must be provided if feature_names==0.
  143  **/
  144 
  145 /*****************************************************************
  146  *** bulk_extractor has a private implementation of IPv4 and IPv6,
  147  *** UDP and TCP.
  148  ***
  149  *** We did this becuase we found slightly different versions on
  150  *** MacOS, Ubuntu Linux, Fedora Linux, Centos, Mingw, and Cygwin.
  151  *** TCP/IP isn't changing anytime soon, and when it changes (as it
  152  *** did with IPv6), these different systems all implemented it slightly
  153  *** differently, and that caused a lot of problems for us.
  154  *** So the BE13 API has a single implementation and it's good enough
  155  *** for our uses.
  156  ***/
  157 
  158 namespace be13 {
  159 
  160 #ifndef ETH_ALEN
  161 #  define ETH_ALEN 6                    // ethernet address len
  162 #endif
  163 
  164 #ifndef IPPROTO_TCP
  165 #  define IPPROTO_TCP     6               /* tcp */
  166 #endif
  167 
  168     struct ether_addr {
  169         uint8_t ether_addr_octet[ETH_ALEN];
  170     } __attribute__ ((__packed__));
  171 
  172     /* 10Mb/s ethernet header */
  173     struct ether_header {
  174         uint8_t  ether_dhost[ETH_ALEN]; /* destination eth addr */
  175         uint8_t  ether_shost[ETH_ALEN]; /* source ether addr    */
  176         uint16_t ether_type;            /* packet type ID field */
  177     } __attribute__ ((__packed__));
  178 
  179     /* The mess below is becuase these items are typedefs and
  180      * structs on some systems and #defines on other systems
  181      * So in the interest of portability we need to define *new*
  182      * structures that are only used here
  183      */
  184 
  185     typedef uint32_t ip4_addr_t;         // historical
  186 
  187     // on windows we use the definition that's in winsock
  188     struct ip4_addr {
  189         ip4_addr_t addr;
  190     };
  191 
  192     /*
  193      * Structure of an internet header, naked of options.
  194      */
  195     struct ip4 {
  196 #if __BYTE_ORDER == __LITTLE_ENDIAN
  197         uint8_t ip_hl:4;                /* header length */
  198         uint8_t ip_v:4;                 /* version */
  199 #endif
  200 #if __BYTE_ORDER == __BIG_ENDIAN
  201         uint8_t ip_v:4;                 /* version */
  202         uint8_t ip_hl:4;                /* header length */
  203 #endif
  204         uint8_t  ip_tos;                /* type of service */
  205         uint16_t ip_len;                /* total length */
  206         uint16_t ip_id;                 /* identification */
  207         uint16_t ip_off;                /* fragment offset field */
  208 #define IP_RF 0x8000                    /* reserved fragment flag */
  209 #define IP_DF 0x4000                    /* dont fragment flag */
  210 #define IP_MF 0x2000                    /* more fragments flag */
  211 #define IP_OFFMASK 0x1fff               /* mask for fragmenting bits */
  212         uint8_t ip_ttl;                 /* time to live */
  213         uint8_t ip_p;                   /* protocol */
  214         uint16_t ip_sum;                        /* checksum */
  215         struct ip4_addr ip_src, ip_dst; /* source and dest address */
  216     } __attribute__ ((__packed__));
  217 
  218     struct ip4_dgram {
  219         const struct ip4 *header;
  220         const uint8_t *payload;
  221         uint16_t payload_len;
  222     };
  223 
  224     /*
  225      * IPv6 header structure
  226      */
  227     struct ip6_addr {           // our own private ipv6 definition
  228         union {
  229             uint8_t   addr8[16];        // three ways to get the data
  230             uint16_t  addr16[8];
  231             uint32_t  addr32[4];
  232         } addr;                    /* 128-bit IP6 address */
  233     };
  234     struct ip6_hdr {
  235         union {
  236             struct ip6_hdrctl {
  237                 uint32_t ip6_un1_flow;  /* 20 bits of flow-ID */
  238                 uint16_t ip6_un1_plen;  /* payload length */
  239                 uint8_t  ip6_un1_nxt;   /* next header */
  240                 uint8_t  ip6_un1_hlim;  /* hop limit */
  241             } ip6_un1;
  242             uint8_t ip6_un2_vfc;        /* 4 bits version, top 4 bits class */
  243         } ip6_ctlun;
  244         struct ip6_addr ip6_src;        /* source address */
  245         struct ip6_addr ip6_dst;        /* destination address */
  246     } __attribute__((__packed__));
  247 
  248     struct ip6_dgram {
  249         const struct ip6_hdr *header;
  250         const uint8_t *payload;
  251         uint16_t payload_len;
  252     };
  253 
  254     /*
  255      * TCP header.
  256      * Per RFC 793, September, 1981.
  257      */
  258     typedef     uint32_t tcp_seq;
  259     struct tcphdr {
  260         uint16_t th_sport;              /* source port */
  261         uint16_t th_dport;              /* destination port */
  262         tcp_seq th_seq;         /* sequence number */
  263         tcp_seq th_ack;         /* acknowledgement number */
  264 #  if __BYTE_ORDER == __LITTLE_ENDIAN
  265         uint8_t th_x2:4;                /* (unused) */
  266         uint8_t th_off:4;               /* data offset */
  267 #  endif
  268 #  if __BYTE_ORDER == __BIG_ENDIAN
  269         uint8_t th_off:4;               /* data offset */
  270         uint8_t th_x2:4;                /* (unused) */
  271 #  endif
  272         uint8_t th_flags;
  273 #  define TH_FIN        0x01
  274 #  define TH_SYN        0x02
  275 #  define TH_RST        0x04
  276 #  define TH_PUSH       0x08
  277 #  define TH_ACK        0x10
  278 #  define TH_URG        0x20
  279     uint16_t th_win;            /* window */
  280     uint16_t th_sum;            /* checksum */
  281     uint16_t th_urp;            /* urgent pointer */
  282 };
  283 /*
  284  * The packet_info structure records packets after they are read from the pcap library.
  285  * It preserves the original pcap information and information decoded from the MAC and
  286  * VLAN (IEEE 802.1Q) layers, as well as information that might be present from 802.11
  287  * interfaces. However it does not preserve the full radiotap information.
  288  *
  289  * packet_info is created to make it easier to write network forensic software. It encapsulates
  290  * much of the common knowledge needed to operate on packet-based IP networks.
  291  *
  292  * @param ts   - the actual packet time to use (adjusted)
  293  * @param pcap_data - Original data offset point from pcap
  294  * @param data - the actual packet data, minus the MAC layer
  295  * @param datalen - How much data is available at the datalen pointer
  296  *
  297  */
  298 class packet_info {
  299 public:
  300     // IPv4 header offsets
  301     static const size_t ip4_proto_off = 9;
  302     static const size_t ip4_src_off = 12;
  303     static const size_t ip4_dst_off = 16;
  304     // IPv6 header offsets
  305     static const size_t ip6_nxt_hdr_off = 6;
  306     static const size_t ip6_plen_off = 4;
  307     static const size_t ip6_src_off = 8;
  308     static const size_t ip6_dst_off = 24;
  309     // TCP header offsets
  310     static const size_t tcp_sport_off = 0;
  311     static const size_t tcp_dport_off = 2;
  312 
  313     class frame_too_short : public std::logic_error {
  314     public:
  315         frame_too_short() :
  316             std::logic_error("frame too short to contain requisite network structures") {}
  317     };
  318 
  319     enum vlan_t {NO_VLAN=-1};
  320     /** create a packet, usually an IP packet.
  321      * @param d - start of MAC packet
  322      * @param d2 - start of IP data
  323      */
  324     packet_info(const int dlt,const struct pcap_pkthdr *h,const u_char *d,
  325                 const struct timeval &ts_,const uint8_t *d2,size_t dl2):
  326         pcap_dlt(dlt),pcap_hdr(h),pcap_data(d),ts(ts_),ip_data(d2),ip_datalen(dl2){}
  327     packet_info(const int dlt,const struct pcap_pkthdr *h,const u_char *d):
  328         pcap_dlt(dlt),pcap_hdr(h),pcap_data(d),ts(h->ts),ip_data(d),ip_datalen(h->caplen){}
  329 
  330     const int    pcap_dlt;              // data link type; needed by libpcap, not provided
  331     const struct pcap_pkthdr *pcap_hdr; // provided by libpcap
  332     const u_char *pcap_data;            // provided by libpcap; where the MAC layer begins
  333     const struct timeval &ts;           // when packet received; possibly modified before packet_info created
  334     const uint8_t *const ip_data;       // pointer to where ip data begins
  335     const size_t ip_datalen;            // length of ip data
  336 
  337     static u_short nshort(const u_char *buf,size_t pos);   // return a network byte order short at offset pos
  338     int     ip_version() const;         // returns 4, 6 or 0
  339     u_short ether_type() const;         // returns 0 if not IEEE802, otherwise returns ether_type
  340     int     vlan() const;               // returns NO_VLAN if not IEEE802 or not VLAN, othererwise VID
  341     const uint8_t *get_ether_dhost() const;   // returns a pointer to ether dhost if ether packet
  342     const uint8_t *get_ether_shost() const;   // returns a pointer to ether shost if ether packet
  343 
  344     // packet typing
  345     bool    is_ip4() const;
  346     bool    is_ip6() const;
  347     bool    is_ip4_tcp() const;
  348     bool    is_ip6_tcp() const;
  349     // packet extraction
  350     // IPv4 - return pointers to fields or throws frame_too_short exception
  351     const struct in_addr *get_ip4_src() const;
  352     const struct in_addr *get_ip4_dst() const;
  353     uint8_t get_ip4_proto() const;
  354     // IPv6
  355     uint8_t  get_ip6_nxt_hdr() const;
  356     uint16_t get_ip6_plen() const;
  357     const struct ip6_addr *get_ip6_src() const;
  358     const struct ip6_addr *get_ip6_dst() const;
  359     // TCP
  360     uint16_t get_ip4_tcp_sport() const;
  361     uint16_t get_ip4_tcp_dport() const;
  362     uint16_t get_ip6_tcp_sport() const;
  363     uint16_t get_ip6_tcp_dport() const;
  364 };
  365 
  366 #ifdef DLT_IEEE802
  367     inline u_short packet_info::ether_type() const
  368     {
  369         if(pcap_dlt==DLT_IEEE802 || pcap_dlt==DLT_EN10MB){
  370             const struct ether_header *eth_header = (struct ether_header *) pcap_data;
  371             return ntohs(eth_header->ether_type);
  372         }
  373         return 0;
  374     }
  375 #endif
  376 
  377 #ifndef ETHERTYPE_PUP
  378 #define ETHERTYPE_PUP           0x0200          /* Xerox PUP */
  379 #endif
  380 
  381 #ifndef ETHERTYPE_SPRITE
  382 #define ETHERTYPE_SPRITE        0x0500          /* Sprite */
  383 #endif
  384 
  385 #ifndef ETHERTYPE_IP
  386 #define ETHERTYPE_IP            0x0800          /* IP */
  387 #endif
  388 
  389 #ifndef ETHERTYPE_ARP
  390 #define ETHERTYPE_ARP           0x0806          /* Address resolution */
  391 #endif
  392 
  393 #ifndef ETHERTYPE_REVARP
  394 #define ETHERTYPE_REVARP        0x8035          /* Reverse ARP */
  395 #endif
  396 
  397 #ifndef ETHERTYPE_AT
  398 #define ETHERTYPE_AT            0x809B          /* AppleTalk protocol */
  399 #endif
  400 
  401 #ifndef ETHERTYPE_AARP
  402 #define ETHERTYPE_AARP          0x80F3          /* AppleTalk ARP */
  403 #endif
  404 
  405 #ifndef ETHERTYPE_VLAN
  406 #define ETHERTYPE_VLAN          0x8100          /* IEEE 802.1Q VLAN tagging */
  407 #endif
  408 
  409 #ifndef ETHERTYPE_IPX
  410 #define ETHERTYPE_IPX           0x8137          /* IPX */
  411 #endif
  412 
  413 #ifndef ETHERTYPE_IPV6
  414 #define ETHERTYPE_IPV6          0x86dd          /* IP protocol version 6 */
  415 #endif
  416 
  417 #ifndef ETHERTYPE_LOOPBACK
  418 #define ETHERTYPE_LOOPBACK      0x9000          /* used to test interfaces */
  419 #endif
  420 
  421 
  422     inline u_short packet_info::nshort(const u_char *buf,size_t pos)
  423     {
  424         return (buf[pos]<<8) | (buf[pos+1]);
  425     }
  426 
  427     inline int packet_info::vlan() const
  428     {
  429         if(ether_type()==ETHERTYPE_VLAN){
  430             return nshort(pcap_data,sizeof(struct ether_header));
  431         }
  432         return -1;
  433     }
  434 
  435     inline int packet_info::ip_version() const
  436     {
  437         /* This takes advantage of the fact that ip4 and ip6 put the version number in the same place */
  438         if (ip_datalen >= sizeof(struct ip4)) {
  439             const struct ip4 *ip_header = (struct ip4 *) ip_data;
  440             switch(ip_header->ip_v){
  441             case 4: return 4;
  442             case 6: return 6;
  443             }
  444         }
  445         return 0;
  446     }
  447 
  448     // packet typing
  449 
  450     inline bool packet_info::is_ip4() const
  451     {
  452         return ip_version() == 4;
  453     }
  454 
  455     inline bool packet_info::is_ip6() const
  456     {
  457         return ip_version() == 6;
  458     }
  459 
  460     inline bool packet_info::is_ip4_tcp() const
  461     {
  462         if(ip_datalen < sizeof(struct ip4) + sizeof(struct tcphdr)) {
  463             return false;
  464         }
  465         return *((uint8_t*) (ip_data + ip4_proto_off)) == IPPROTO_TCP;
  466         return false;
  467     }
  468 
  469     inline bool packet_info::is_ip6_tcp() const
  470     {
  471         if(ip_datalen < sizeof(struct ip6_hdr) + sizeof(struct tcphdr)) {
  472             return false;
  473         }
  474         return *((uint8_t*) (ip_data + ip6_nxt_hdr_off)) == IPPROTO_TCP;
  475     }
  476 
  477     // packet extraction
  478     // precondition: the apropriate packet type function must return true before using these functions.
  479     //     example: is_ip4_tcp() must return true before calling get_ip4_tcp_sport()
  480 
  481     // Get ether addresses; should this handle vlan and such?
  482     inline const uint8_t *packet_info::get_ether_dhost() const
  483     {
  484         if(pcap_hdr->caplen < sizeof(struct ether_addr)){
  485             throw new frame_too_short();
  486         }
  487         return ((const struct ether_header *)pcap_data)->ether_dhost;
  488     }
  489 
  490     inline const uint8_t *packet_info::get_ether_shost() const
  491     {
  492         if(pcap_hdr->caplen < sizeof(struct ether_addr)){
  493             throw new frame_too_short();
  494         }
  495         return ((const struct ether_header *)pcap_data)->ether_shost;
  496     }
  497 
  498     // IPv4
  499 #  ifdef HAVE_DIAGNOSTIC_CAST_ALIGN
  500 #    pragma GCC diagnostic ignored "-Wcast-align"
  501 #  endif
  502     inline const struct in_addr *packet_info::get_ip4_src() const
  503     {
  504         if(ip_datalen < sizeof(struct ip4)) {
  505             throw new frame_too_short();
  506         }
  507         return (const struct in_addr *) ip_data + ip4_src_off;
  508     }
  509     inline const struct in_addr *packet_info::get_ip4_dst() const
  510     {
  511         if(ip_datalen < sizeof(struct ip4)) {
  512             throw new frame_too_short();
  513         }
  514         return (const struct in_addr *) ip_data + ip4_dst_off;
  515     }
  516 #  ifdef HAVE_DIAGNOSTIC_CAST_ALIGN
  517 #    pragma GCC diagnostic warning "-Wcast-align"
  518 #  endif
  519     inline uint8_t packet_info::get_ip4_proto() const
  520     {
  521         if(ip_datalen < sizeof(struct ip4)) {
  522             throw new frame_too_short();
  523         }
  524         return *((uint8_t *) (ip_data + ip4_proto_off));
  525     }
  526     // IPv6
  527     inline uint8_t packet_info::get_ip6_nxt_hdr() const
  528     {
  529         if(ip_datalen < sizeof(struct ip6_hdr)) {
  530             throw new frame_too_short();
  531         }
  532         return *((uint8_t *) (ip_data + ip6_nxt_hdr_off));
  533     }
  534     inline uint16_t packet_info::get_ip6_plen() const
  535     {
  536         if(ip_datalen < sizeof(struct ip6_hdr)) {
  537             throw new frame_too_short();
  538         }
  539         //return ntohs(*((uint16_t *) (ip_data + ip6_plen_off)));
  540         return nshort(ip_data,ip6_plen_off);
  541     }
  542 #  ifdef HAVE_DIAGNOSTIC_CAST_ALIGN
  543 #    pragma GCC diagnostic ignored "-Wcast-align"
  544 #  endif
  545     inline const struct ip6_addr *packet_info::get_ip6_src() const
  546     {
  547         if(ip_datalen < sizeof(struct ip6_hdr)) {
  548             throw new frame_too_short();
  549         }
  550         return (const struct ip6_addr *) ip_data + ip6_src_off;
  551     }
  552     inline const struct ip6_addr *packet_info::get_ip6_dst() const
  553     {
  554         if(ip_datalen < sizeof(struct ip6_hdr)) {
  555             throw new frame_too_short();
  556         }
  557         return (const struct ip6_addr *) ip_data + ip6_dst_off;
  558     }
  559 #  ifdef HAVE_DIAGNOSTIC_CAST_ALIGN
  560 #    pragma GCC diagnostic warning "-Wcast-align"
  561 #  endif
  562 
  563     // TCP
  564     inline uint16_t packet_info::get_ip4_tcp_sport() const
  565     {
  566         if(ip_datalen < sizeof(struct tcphdr) + sizeof(struct ip4)) {
  567             throw new frame_too_short();
  568         }
  569         //return ntohs(*((uint16_t *) (ip_data + sizeof(struct ip4) + tcp_sport_off)));
  570         return nshort(ip_data,sizeof(struct ip4) + tcp_sport_off);
  571     }
  572     inline uint16_t packet_info::get_ip4_tcp_dport() const
  573     {
  574         if(ip_datalen < sizeof(struct tcphdr) + sizeof(struct ip4)) {
  575             throw new frame_too_short();
  576         }
  577         //return ntohs(*((uint16_t *) (ip_data + sizeof(struct ip4) + tcp_dport_off)));
  578         return nshort(ip_data,sizeof(struct ip4) + tcp_dport_off); //
  579 
  580     }
  581     inline uint16_t packet_info::get_ip6_tcp_sport() const
  582     {
  583         if(ip_datalen < sizeof(struct tcphdr) + sizeof(struct ip6_hdr)) {
  584             throw new frame_too_short();
  585         }
  586         //return ntohs(*((uint16_t *) (ip_data + sizeof(struct ip6_hdr) + tcp_sport_off)));
  587         return nshort(ip_data,sizeof(struct ip6_hdr) + tcp_sport_off); //
  588     }
  589     inline uint16_t packet_info::get_ip6_tcp_dport() const
  590     {
  591         if(ip_datalen < sizeof(struct tcphdr) + sizeof(struct ip6_hdr)) {
  592             throw new frame_too_short();
  593         }
  594         //return ntohs(*((uint16_t *) (ip_data + sizeof(struct ip6_hdr) + tcp_dport_off)));
  595         return nshort(ip_data,sizeof(struct ip6_hdr) + tcp_dport_off); //
  596     }
  597 };
  598 
  599 
  600 typedef void scanner_t(const class scanner_params &sp,const class recursion_control_block &rcb);
  601 typedef void process_t(const class scanner_params &sp);
  602 typedef void packet_callback_t(void *user,const be13::packet_info &pi);
  603 
  604 /** scanner_info gets filled in by the scanner to tell the caller about the scanner.
  605  *
  606  */
  607 class scanner_info {
  608 private:
  609     static std::stringstream helpstream; // where scanner info help messages are saved.
  610 
  611     // default copy construction and assignment are meaningless
  612     // and not implemented
  613     scanner_info(const scanner_info &i);
  614     scanner_info &operator=(const scanner_info &i);
  615  public:
  616     static std::string helpstr(){return helpstream.str();}
  617     typedef std::map<std::string,std::string>  config_t; // configuration for scanner passed in
  618 
  619     /* scanner flags */
  620     static const int SCANNER_DISABLED       = 0x001; // v1: enabled by default
  621     static const int SCANNER_NO_USAGE       = 0x002; // v1: do not show scanner in usage
  622     static const int SCANNER_NO_ALL         = 0x004; // v2: do not enable with -eall
  623     static const int SCANNER_FIND_SCANNER   = 0x008; // v2: this scanner uses the find_list
  624     static const int SCANNER_RECURSE        = 0x010; // v3: this scanner will recurse
  625     static const int SCANNER_RECURSE_EXPAND = 0x020; // v3: recurses AND result is >= original size
  626     static const int SCANNER_WANTS_NGRAMS   = 0x040; // v3: Scanner gets buffers that are constant n-grams
  627     static const int SCANNER_FAST_FIND      = 0x080; // v3: This scanner is a very fast FIND scanner
  628     static const int SCANNER_DEPTH_0        = 0x100; // v3: scanner only runs at depth 0 by default
  629     static const int CURRENT_SI_VERSION     = 4;
  630 
  631     static const std::string flag_to_string(const int flag){
  632         std::string ret;
  633         if(flag==0) ret += "NONE ";
  634         if(flag & SCANNER_DISABLED) ret += "SCANNER_DISABLED ";
  635         if(flag & SCANNER_NO_USAGE) ret += "SCANNER_NO_USAGE ";
  636         if(flag & SCANNER_NO_ALL) ret += "SCANNER_NO_ALL ";
  637         if(flag & SCANNER_FIND_SCANNER) ret += "SCANNER_FIND_SCANNER ";
  638         if(flag & SCANNER_RECURSE) ret += "SCANNER_RECURSE ";
  639         if(flag & SCANNER_RECURSE_EXPAND) ret += "SCANNER_RECURSE_EXPAND ";
  640         if(flag & SCANNER_WANTS_NGRAMS) ret += "SCANNER_WANTS_NGRAMS ";
  641         return ret;
  642     }
  643 
  644     /* Global config is passed to each scanner as a pointer when it is loaded.
  645      * Scanner histograms are added to 'histograms' by machinery.
  646      */
  647     struct scanner_config {
  648         scanner_config():namevals(),debug(){};
  649         virtual ~scanner_config(){}
  650         config_t  namevals;             // v3: (input) name=val map
  651         int       debug;                // v3: (input) current debug level
  652     };
  653 
  654     // never change the order or delete old fields, or else you will
  655     // break backwards compatability
  656     scanner_info():si_version(CURRENT_SI_VERSION),
  657                    name(),author(),description(),url(),scanner_version(),flags(0),feature_names(),
  658                    histogram_defs(),packet_user(),packet_cb(),config(){}
  659     /* PASSED FROM SCANNER to API: */
  660     int         si_version;             // version number for this structure
  661     std::string      name;                   // v1: (output) scanner name
  662     std::string      author;                 // v1: (output) who wrote me?
  663     std::string      description;            // v1: (output) what do I do?
  664     std::string      url;                    // v1: (output) where I come from
  665     std::string      scanner_version;        // v1: (output) version for the scanner
  666     uint64_t    flags;                  // v1: (output) flags
  667     std::set<std::string> feature_names;          // v1: (output) features I need
  668     histogram_defs_t histogram_defs;        // v1: (output) histogram definition info
  669     void        *packet_user;           // v2: (output) data for network callback
  670     packet_callback_t *packet_cb;       // v2: (output) callback for processing network packets, or NULL
  671 
  672     /* PASSED FROM API TO SCANNER; access with functions below */
  673     const scanner_config *config;       // v3: (intput to scanner) config
  674 
  675     // These methods are implemented in the plugin system for the scanner to get config information.
  676     // The get_config methods should be called on the si object during PHASE_STARTUP
  677     virtual void get_config(const scanner_info::config_t &c,
  678                             const std::string &name,std::string *val,const std::string &help);
  679     virtual void get_config(const std::string &name,std::string *val,const std::string &help);
  680     virtual void get_config(const std::string &name,uint64_t *val,const std::string &help);
  681     virtual void get_config(const std::string &name,int32_t *val,const std::string &help);
  682     virtual void get_config(const std::string &name,uint32_t *val,const std::string &help);
  683     virtual void get_config(const std::string &name,uint16_t *val,const std::string &help);
  684     virtual void get_config(const std::string &name,uint8_t *val,const std::string &help);
  685 #ifdef __APPLE__
  686     virtual void get_config(const std::string &name,size_t *val,const std::string &help);
  687 #define HAVE_GET_CONFIG_SIZE_T
  688 #endif
  689     virtual void get_config(const std::string &name,bool *val,const std::string &help);
  690     virtual ~scanner_info(){};
  691 };
  692 #include <map>
  693 /**
  694  * The scanner_params class is a way for sending the scanner parameters
  695  * for this particular sbuf to be scanned.
  696  */
  697 
  698 class scanner_params {
  699  public:
  700     enum print_mode_t {MODE_NONE=0,MODE_HEX,MODE_RAW,MODE_HTTP};
  701     static const int CURRENT_SP_VERSION=3;
  702 
  703     typedef std::map<std::string,std::string> PrintOptions;
  704     static print_mode_t getPrintMode(const PrintOptions &po){
  705         PrintOptions::const_iterator p = po.find("print_mode_t");
  706         if(p != po.end()){
  707             if(p->second=="MODE_NONE") return MODE_NONE;
  708             if(p->second=="MODE_HEX") return MODE_HEX;
  709             if(p->second=="MODE_RAW") return MODE_RAW;
  710             if(p->second=="MODE_HTTP") return MODE_HTTP;
  711         }
  712         return MODE_NONE;
  713     }
  714     static void setPrintMode(PrintOptions &po,int mode){
  715         switch(mode){
  716         default:
  717         case MODE_NONE:po["print_mode_t"]="MODE_NONE";return;
  718         case MODE_HEX:po["print_mode_t"]="MODE_HEX";return;
  719         case MODE_RAW:po["print_mode_t"]="MODE_RAW";return;
  720         case MODE_HTTP:po["print_mode_t"]="MODE_HTTP";return;
  721         }
  722     }
  723 
  724     // phase_t specifies when the scanner is being called
  725     typedef enum {
  726         PHASE_NONE     = -1,
  727         PHASE_STARTUP  = 0,            // called in main thread when scanner loads; called on EVERY scanner (called for help)
  728         PHASE_INIT     = 3,            // called in main thread for every ENABLED scanner after all scanners loaded
  729         PHASE_THREAD_BEFORE_SCAN = 4,  // called in worker thread for every ENABLED scanner before first scan
  730         PHASE_SCAN     = 1,            // called in worker thread for every ENABLED scanner to scan an sbuf
  731         PHASE_SHUTDOWN = 2,            // called in main thread for every ENABLED scanner when scanner is shutdown
  732     } phase_t ;
  733     static PrintOptions no_options;    // in common.cpp
  734 
  735     /********************
  736      *** CONSTRUCTORS ***
  737      ********************/
  738 
  739     /* A scanner params with all of the instance variables, typically for scanning  */
  740     scanner_params(phase_t phase_,const sbuf_t &sbuf_,class feature_recorder_set &fs_,
  741                    PrintOptions &print_options_):
  742         sp_version(CURRENT_SP_VERSION),
  743         phase(phase_),sbuf(sbuf_),fs(fs_),depth(0),print_options(print_options_),info(0),sxml(0){
  744     }
  745 
  746     /* A scanner params with no print options */
  747     scanner_params(phase_t phase_,const sbuf_t &sbuf_, class feature_recorder_set &fs_):
  748         sp_version(CURRENT_SP_VERSION),
  749         phase(phase_),sbuf(sbuf_),fs(fs_),depth(0),print_options(no_options),info(0),sxml(0){
  750     }
  751 
  752     /* A scanner params with no print options but an xmlstream */
  753     scanner_params(phase_t phase_,const sbuf_t &sbuf_,class feature_recorder_set &fs_,std::stringstream *xmladd):
  754         sp_version(CURRENT_SP_VERSION),
  755         phase(phase_),sbuf(sbuf_),fs(fs_),depth(0),print_options(no_options),info(0),sxml(xmladd){
  756     }
  757 
  758     /** Construct a scanner_params for recursion from an existing sp and a new sbuf.
  759      * Defaults to phase1
  760      */
  761     scanner_params(const scanner_params &sp_existing,const sbuf_t &sbuf_new):
  762         sp_version(CURRENT_SP_VERSION),phase(sp_existing.phase),
  763         sbuf(sbuf_new),fs(sp_existing.fs),depth(sp_existing.depth+1),
  764         print_options(sp_existing.print_options),info(sp_existing.info),sxml(0){
  765         assert(sp_existing.sp_version==CURRENT_SP_VERSION);
  766     };
  767 
  768     /**
  769      * A scanner params with an empty info
  770      */
  771 
  772     /**************************
  773      *** INSTANCE VARIABLES ***
  774      **************************/
  775 
  776     const int                   sp_version;                /* version number of this structure */
  777     const phase_t               phase;                 /* v1: 0=startup, 1=normal, 2=shutdown (changed to phase_t in v1.3) */
  778     const sbuf_t                &sbuf;                 /* v1: what to scan / only valid in SCAN_PHASE */
  779     class feature_recorder_set  &fs;     /* v1: where to put the results / only valid in SCAN_PHASE */
  780     const uint32_t              depth;            /* v1: how far down are we? / only valid in SCAN_PHASE */
  781 
  782     PrintOptions                &print_options;    /* v1: how to print / NOT USED IN SCANNERS */
  783     scanner_info                *info;             /* v2: set/get parameters on startup, hasher */
  784     std::stringstream           *sxml;         /* v3: on scanning and shutdown: CDATA added to XML stream (advanced feature) */
  785 };
  786 
  787 
  788 inline std::ostream & operator <<(std::ostream &os,const class scanner_params &sp){
  789     os << "scanner_params(" << sp.sbuf << ")";
  790     return os;
  791 };
  792 
  793 class recursion_control_block {
  794  public:
  795 /**
  796  * @param callback_ - the function to call back
  797  * @param partName_ - the part of the forensic path processed by this scanner.
  798  */
  799     recursion_control_block(process_t *callback_,std::string partName_):
  800         callback(callback_),partName(partName_){}
  801     process_t *callback;
  802     std::string partName;            /* eg "ZIP", "GZIP" */
  803 };
  804 
  805 /* plugin.cpp. This will become a class...  */
  806 class scanner_def {
  807 public:;
  808     static uint32_t max_depth;          // maximum depth to scan for the scanners
  809     static uint32_t max_ngram;          // maximum ngram size to change
  810     scanner_def():scanner(0),enabled(false),info(),pathPrefix(){};
  811     scanner_t  *scanner;                // pointer to the primary entry point
  812     bool        enabled;                // is enabled?
  813     scanner_info info;                  // info block sent to and returned by scanner
  814     std::string      pathPrefix;             /* path prefix for recursive scanners */
  815 };
  816 
  817 namespace be13 {
  818     /* plugin.cpp */
  819 
  820     struct plugin {
  821         typedef std::vector<scanner_def *> scanner_vector;
  822         static scanner_vector current_scanners;                         // current scanners
  823         static bool dup_data_alerts;  // notify when duplicate data is not processed
  824         static uint64_t dup_data_encountered; // amount of dup data encountered
  825 
  826         static void set_scanner_debug(int debug);
  827 
  828         static void load_scanner(scanner_t scanner,const scanner_info::scanner_config &sc); // load a specific scanner
  829         static void load_scanner_file(std::string fn,const scanner_info::scanner_config &sc);    // load a scanner from a file
  830         static void load_scanners(scanner_t * const *scanners_builtin,const scanner_info::scanner_config &sc); // load the scan_ plugins
  831         static void load_scanner_directory(const std::string &dirname,const scanner_info::scanner_config &sc); // load scanners in the directory
  832         static void load_scanner_directories(const std::vector<std::string> &dirnames,const scanner_info::scanner_config &sc);
  833         static void load_scanner_packet_handlers();
  834 
  835         // send every enabled scanner the phase message
  836         static void message_enabled_scanners(scanner_params::phase_t phase,feature_recorder_set &fs);
  837 
  838         // returns the named scanner, or 0 if no scanner of that name
  839         static scanner_t *find_scanner(const std::string &name);
  840         static void get_enabled_scanners(std::vector<std::string> &svector); // put the enabled scanners into the vector
  841         static void add_enabled_scanner_histograms_to_feature_recorder_set(feature_recorder_set &fs);
  842         static bool find_scanner_enabled(); // return true if a find scanner is enabled
  843 
  844         // print info about the scanners:
  845         static void scanners_disable_all();                    // saves a command to disable all
  846         static void scanners_enable_all();                    // enable all of them
  847         static void set_scanner_enabled(const std::string &name,bool enable);
  848         static void set_scanner_enabled_all(bool enable);
  849         static void scanners_enable(const std::string &name); // saves a command to enable this scanner
  850         static void scanners_disable(const std::string &name); // saves a command to disable this scanner
  851         static void scanners_process_enable_disable_commands();               // process the enable/disable and config commands
  852         static void scanners_init(feature_recorder_set &fs); // init the scanners
  853 
  854         static void info_scanners(bool detailed_info,
  855                                   bool detailed_settings,
  856                                   scanner_t * const *scanners_builtin,const char enable_opt,const char disable_opt);
  857 
  858 
  859         /* Run the phases on the scanners */
  860         static void phase_shutdown(feature_recorder_set &fs,std::stringstream *sxml=0); // sxml is where to put XML from scanners that shutdown
  861         static uint32_t get_max_depth_seen();
  862         static void process_sbuf(const class scanner_params &sp);                              /* process for feature extraction */
  863         static void process_packet(const be13::packet_info &pi);
  864 
  865         /* recorders */
  866         static void get_scanner_feature_file_names(feature_file_names_t &feature_file_names);
  867 
  868     };
  869 };
  870 
  871 inline std::string itos(int i){ std::stringstream ss; ss << i;return ss.str();}
  872 inline std::string dtos(double d){ std::stringstream ss; ss << d;return ss.str();}
  873 inline std::string utos(unsigned int i){ std::stringstream ss; ss << i;return ss.str();}
  874 inline std::string utos(uint64_t i){ std::stringstream ss; ss << i;return ss.str();}
  875 inline std::string utos(uint16_t i){ std::stringstream ss; ss << i;return ss.str();}
  876 inline std::string safe_utf16to8(std::wstring s){ // needs to be cleaned up
  877     std::string utf8_line;
  878     try {
  879         utf8::utf16to8(s.begin(),s.end(),back_inserter(utf8_line));
  880     } catch(utf8::invalid_utf16 const &){
  881         /* Exception thrown: bad UTF16 encoding */
  882         utf8_line = "";
  883     }
  884     return utf8_line;
  885 }
  886 
  887 inline std::wstring safe_utf8to16(std::string s){ // needs to be cleaned up
  888     std::wstring utf16_line;
  889     try {
  890         utf8::utf8to16(s.begin(),s.end(),back_inserter(utf16_line));
  891     } catch(utf8::invalid_utf8 const &){
  892         /* Exception thrown: bad UTF16 encoding */
  893         utf16_line = L"";
  894     }
  895     return utf16_line;
  896 }
  897 
  898 // truncate string at the matching char
  899 inline void truncate_at(std::string &line, char ch) {
  900     size_t pos = line.find(ch);
  901     if(pos != std::string::npos) line.resize(pos);
  902 }
  903 
  904 #ifndef HAVE_ISXDIGIT
  905 inline int isxdigit(int c)
  906 {
  907     return (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F');
  908 }
  909 #endif
  910 
  911 /* Useful functions for scanners */
  912 #define ONE_HUNDRED_NANO_SEC_TO_SECONDS 10000000
  913 #define SECONDS_BETWEEN_WIN32_EPOCH_AND_UNIX_EPOCH 11644473600LL
  914 /*
  915  * 11644473600 is the number of seconds between the Win32 epoch
  916  * and the Unix epoch.
  917  *
  918  * http://arstechnica.com/civis/viewtopic.php?f=20&t=111992
  919  * gmtime_r() is Linux-specific. You'll find a copy in util.cpp for Windows.
  920  */
  921 
  922 inline std::string microsoftDateToISODate(const uint64_t &time)
  923 {
  924     time_t tmp = (time / ONE_HUNDRED_NANO_SEC_TO_SECONDS) - SECONDS_BETWEEN_WIN32_EPOCH_AND_UNIX_EPOCH;
  925 
  926     struct tm time_tm;
  927     gmtime_r(&tmp, &time_tm);
  928     char buf[256];
  929     strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%SZ", &time_tm); // Zulu time
  930     return std::string(buf);
  931 }
  932 
  933 /* Convert Unix timestamp to ISO format */
  934 inline std::string unixTimeToISODate(const uint64_t &t)
  935 {
  936     struct tm time_tm;
  937     time_t tmp=t;
  938     gmtime_r(&tmp, &time_tm);
  939     char buf[256];
  940     strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%SZ", &time_tm); // Zulu time
  941     return std::string(buf);
  942 }
  943 
  944 /* Many internal windows and Linux structures require a valid printable name in ASCII */
  945 inline bool validASCIIName(const std::string &name)
  946 {
  947     for(size_t i = 0; i< name.size(); i++){
  948         if(((u_char)name[i]) & 0x80) return false; // high bit should not be set
  949         if(((u_char)name[i]) < ' ') return false;  // should not be control character
  950         if(((u_char)name[i]) == 0x7f) return false; // DEL is not printable
  951     }
  952     return true;
  953 }
  954 
  955 #endif