"Fossies" - the Fresh Open Source Software Archive

Member "tcpflow-1.6.1/src/iptree.h" (19 Feb 2021, 21869 Bytes) of package /linux/misc/tcpflow-1.6.1.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "iptree.h" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 1.5.0_vs_1.6.1.

    1 /*
    2  * iptree.h:
    3  * 
    4  * Maintains a count of all IP addresses seen, with limits on the
    5  * maximum amount of memory.
    6  *
    7  * #include this file after config.h (or whatever you are calling it)
    8  */
    9 
   10 #ifndef IPTREE_H
   11 #define IPTREE_H
   12 
   13 #include <stdint.h>
   14 #include <algorithm>
   15 #include <assert.h>
   16 #include <iostream>
   17 #include <iomanip>
   18 
   19 #ifdef HAVE_ARPA_INET_H
   20 #include <arpa/inet.h>
   21 #endif
   22 
   23 #define IP4_ADDR_LEN 4
   24 #define IP6_ADDR_LEN 16
   25 
   26 /**
   27  * the iptree.
   28  *
   29  * pruning a node means cutting off its leaves (the node remains in the tree).
   30  */
   31 
   32 /* addrbytes is the number of bytes in the address */
   33 
   34 template <typename TYPE,size_t ADDRBYTES> class iptreet {
   35 private:;
   36     /**
   37      * the node class.
   38      * Each node tracks the sum that it currently has and its two children.
   39      * A node has pointers to the 0 and 1 children, as well as a sum for everything below.
   40      * A short address or prefix being tallied may result in BOTH a sum and one or more PTR values.
   41      * If a node is pruned, ptr0=ptr1=0 and tsum>0.  
   42      * If tsum>0 and ptr0=0 and ptr1=0, then the node cannot be extended.
   43      * Nodes need to know their parent so that nodes found through the cache can be made dirty,
   44      * which requires knowing their parents.
   45      *
   46      * Note: currently we get a slightly different answer when the pruning cache is enabled.
   47      * Not sure why. It's probably not worth fixing at the moment.
   48      */
   49     class node {
   50         /** best describes the best node to prune */
   51     public:
   52         class best {
   53         public:
   54             best &operator=(const best &that){
   55                 ptr  = that.ptr;
   56                 depth = that.depth;
   57                 return *this;
   58             }
   59             const node *ptr;
   60             int depth;
   61             best():ptr(0),depth(-1){}; // don't use this one
   62             best(const node *ptr_,int depth_): ptr(ptr_),depth(depth_){}
   63             best(const best &b):ptr(b.ptr),depth(b.depth){ }
   64             virtual ~best(){}
   65             friend std::ostream & operator<<(std::ostream &os,best const & foo) {
   66                 os << "node=" << foo.ptr << " depth=" << foo.depth << " ";
   67                 return os;
   68             }
   69         };
   70     private:
   71         /* Assignment and copy are not implemented */
   72         node &operator=(const iptreet::node &that);
   73         node(const node &n);
   74     public:
   75         class node *parent;
   76         class node *ptr0;               // 0 bit next
   77         class node *ptr1;               // 1 bit next
   78     private:
   79     public:;
   80         TYPE    tsum;                   // this node and pruned children.
   81 
   82         /* Caching system */
   83         mutable bool    dirty;                  // add() has been called and cached data is no longer valid
   84         mutable best    cached_best;
   85     public:
   86         node(node *p):parent(p),ptr0(0),ptr1(0),tsum(),dirty(false),cached_best(){ }
   87         int children() const {return (ptr0 ? 1 : 0) + (ptr1 ? 1 : 0);}
   88         ~node(){
   89             delete ptr0; ptr0 = 0;
   90             delete ptr1; ptr1 = 0; 
   91         };
   92         // a node is leaf if tsum>0 and both ptrs are 0.
   93         bool isLeaf() const {             
   94             if(tsum>0 && ptr0==0 && ptr1==0) return true;
   95             return false;
   96         }
   97         /**
   98          * prune():
   99          * Cut this node's children off the tree.
  100          * Returns the number removed, which should be larger than 0 (or we shouldn't have been called).
  101          */
  102         int prune(class iptreet &tree){                    // prune this node
  103             //std::cerr << "prune " << this << " ptr0= " << ptr0 << " ptr1=" << ptr1 << " parent= " << parent << "\n";
  104             /* If prune() on a node is called, then both ptr0 and ptr1 nodes, if present,
  105              * must not have children.
  106              * Now delete those that we counted out
  107              */
  108             int removed = 0;
  109             if(ptr0){
  110                 assert(ptr0->isLeaf());   // only prune leaf nodes
  111                 tsum += ptr0->tsum;
  112                 tree.cache_remove(ptr0); // remove it from the cache
  113                 tree.pruned++;
  114                 delete ptr0; ptr0=0;
  115                 tree.nodes--;
  116                 removed++;
  117             }
  118             if(ptr1){
  119                 assert(ptr1->isLeaf()); 
  120                 tsum += ptr1->tsum;
  121                 tree.cache_remove(ptr1);
  122                 tree.pruned++;
  123                 delete ptr1;
  124                 ptr1=0;
  125                 tree.nodes--;
  126                 removed++;
  127             }
  128             assert(removed>0);
  129             assert(isLeaf());           // I am now a leaf!
  130             set_dirty();                // should be able to just set parent
  131             //std::cerr << "   parent dirty=" << parent->dirty << "  this=" << this << " isLeaf()=" << isLeaf() << "\n";
  132             //if(parent){
  133             //parent->dirty=true; // parent is dirty, but no need to propigate it up
  134             //}
  135             return removed;
  136         }
  137 
  138         /**
  139          * Return the best node to prune (the node with the leaves to remove)
  140          * Possible outputs:
  141          * case 1 - no node (if this is a leaf node, it can't be pruned; should not have been called)
  142          * case 2 - this node (if all of the children are leaf)
  143          * case 3 - the best node of the one child (if there is only one child)
  144          * case 4 - the of the non-leaf child (if one child is leaf and one is not)
  145          * case 5 - the better node of each child's best node.
  146          */
  147             
  148         class best best_to_prune(int my_depth) const {
  149             if(dirty==false && cached_best.ptr){
  150                 //return cached_best;     // haven't changed, so return
  151             }
  152             dirty = false;              // we will be cleaning
  153             // case 1 - this is a leaf; it was an error to call best_to_prune
  154             assert(isLeaf()==0);          
  155             // case 2 - our only children are leaves; this is the best node
  156             if ((ptr0==0 || ptr0->isLeaf()) &&
  157                 (ptr1==0 || ptr1->isLeaf())){
  158                 return cached_best = best(this,my_depth); // case 2
  159             }
  160             // case 3 - one of our children is a node and not a leaf,
  161             //        - and the other is a child or not present.
  162             //        - The best to prune is the child's best
  163 
  164             if ((ptr0==0 || ptr0->isLeaf()) &&
  165                 (ptr1!=0 && !ptr1->isLeaf())){
  166                 return cached_best = ptr1->best_to_prune(my_depth+1); // case 3
  167             }
  168 
  169             if ((ptr1==0 || ptr1->isLeaf()) &&
  170                 (ptr0!=0 && !ptr0->isLeaf())){
  171                 return cached_best = ptr0->best_to_prune(my_depth+1); // case 3
  172             }
  173 
  174             // case 5 - the better node of each child's best node.
  175             best ptr0_best = ptr0->best_to_prune(my_depth+1);
  176             best ptr1_best = ptr1->best_to_prune(my_depth+1);
  177 
  178             // The better to prune of two children is the one with a lower sum,
  179             // or the one that is deeper if they have the same sum.
  180             TYPE ptr0_best_sum = ptr0_best.ptr->sum();
  181             TYPE ptr1_best_sum = ptr1_best.ptr->sum();
  182             if(ptr0_best_sum < ptr1_best_sum ||
  183                (ptr0_best_sum == ptr1_best_sum && ptr0_best.depth > ptr1_best.depth)){
  184                 return cached_best = ptr0_best;
  185                }
  186             return cached_best = ptr1_best;
  187         }
  188 
  189         /** The nodesum is the sum of just the node.
  190          * This exists purely because tsum is a private variable.
  191          */
  192         TYPE nodesum() const {
  193             return tsum;
  194         }
  195 
  196         /** The sum is the sum of this node and its children (if they exist) */
  197         TYPE sum() const {
  198             TYPE s = tsum;
  199             if(ptr0) s+=ptr0->sum();
  200             if(ptr1) s+=ptr1->sum();
  201             return s;
  202         }
  203         /** Increment this node by the given amount */
  204         void add(TYPE val) {
  205             tsum+=val;                  // increment
  206             set_dirty();
  207         }          
  208 
  209         void set_dirty() {              // make us dirty and our parent dirty
  210             if(dirty==false){
  211                 dirty = true;
  212                 if(parent && parent->dirty==false){
  213                     parent->set_dirty();    // recurses to the root or the first dirty node.
  214                 } 
  215             }
  216         }
  217 
  218     }; /* end of node class */
  219     class node *root;                  
  220     enum {root_depth=0,
  221           max_histogram_depth=128,
  222           ipv4_bits=32,
  223           ipv6_bits=128,
  224     };
  225     iptreet &operator=(const iptreet &that); // not implemented
  226 protected:
  227     size_t     nodes;                   // nodes in tree
  228     size_t     maxnodes;                // how many will we tolerate?
  229     uint64_t   ctr_added;                   // how many were added
  230     uint64_t   pruned;
  231 public:
  232 
  233 
  234     /****************************************************************
  235      *** static member service routines
  236      ****************************************************************/
  237 
  238     /* get the ith bit; 0 is the MSB */
  239     static bool bit(const uint8_t *addr,size_t i){
  240         return (addr[i / 8]) & (1<<((7-i)&7));
  241     }
  242     /* set the ith bit to 1 */
  243     static void setbit(uint8_t *addr,size_t addrlen, size_t i){
  244         if ( i/8 < addrlen) {
  245             addr[i / 8] |= (1<<((7-i)&7));
  246         }
  247     }
  248     
  249     virtual ~iptreet(){}                // required per compiler warnings
  250     /* copy is a deep copy */
  251     iptreet(const iptreet &n):root(n.root ? new node(*n.root) : 0),
  252                               nodes(n.nodes),maxnodes(n.maxnodes),ctr_added(),pruned(),cache(),cachenext(),cache_hits(),cache_misses(){};
  253 
  254     /* create an empty tree */
  255     iptreet(int maxnodes_):root(new node(0)),nodes(0),maxnodes(maxnodes_),
  256                            ctr_added(),pruned(),cache(),cachenext(),cache_hits(),cache_misses(){
  257         for(size_t i=0;i<cache_size;i++){
  258             cache.push_back(cache_element(0,0,0));
  259         }
  260     };
  261 
  262     /* size the tree; the number of nodes */
  263     size_t size() const {return nodes;};
  264 
  265     /* sum the tree; the total number of adds that have been performed */
  266     TYPE sum() const {return root->sum();};
  267 
  268     /* add a node; implementation below */
  269     void add(const uint8_t *addr,size_t addrlen,TYPE val); 
  270 
  271     /****************************************************************
  272      *** cache
  273      ****************************************************************/
  274     class cache_element {
  275     public:
  276         uint8_t addr[ADDRBYTES];
  277         node *ptr;                      // 0 means cache entry is not in use
  278         cache_element(const uint8_t addr_[ADDRBYTES],size_t addrlen,node *p):addr(),ptr(p){
  279             memcpy(addr,addr_,addrlen);
  280         }
  281     };
  282     enum {cache_size=4};
  283     typedef std::vector<cache_element> cache_t;
  284     cache_t cache;
  285     size_t cachenext;                   // which cache element to evict next
  286     uint64_t cache_hits;
  287     uint64_t cache_misses;
  288 
  289     void cache_remove(const node *p){
  290         for(size_t i=0;i<cache.size();i++){
  291             if(cache[i].ptr==p){
  292                 cache[i].ptr = 0;
  293                 return;
  294             }
  295         }
  296     }
  297 
  298     ssize_t cache_search(const uint8_t *addr,size_t addrlen){
  299         for(size_t i = 0; i<cache.size(); i++){
  300             if(cache[i].ptr && memcmp(cache[i].addr,addr,addrlen)==0){
  301                 cache_hits++;
  302                 return i;
  303             }
  304         }
  305         cache_misses++;
  306         return -1;
  307     }
  308 
  309     void cache_replace(const uint8_t *addr,size_t addrlen,node *ptr) {
  310         if(++cachenext>=cache.size()) cachenext = 0;
  311         memcpy(cache[cachenext].addr,addr,addrlen);
  312         cache[cachenext].ptr = ptr;
  313     }
  314 
  315 
  316     /****************************************************************
  317      *** pruning
  318      ****************************************************************/
  319 
  320     /* prune the tree, starting at the root. Find the node to prune and then prune it.
  321      * node that best_to_prune() returns a const pointer. But we want to modify it, so we
  322      * do a const_cast (which is completely fine).
  323      */
  324     int prune_best_node(){
  325         if(root->isLeaf()) return 0;        // leaf nodes can't be pruned
  326         class node::best b = root->best_to_prune(root_depth);
  327         node *tnode = const_cast<node *>(b.ptr);
  328         if(tnode){
  329             return tnode->prune(*this);
  330         }
  331         return 0;
  332     }
  333 
  334     /* Simple implementation to prune the table if over the limit.
  335      */
  336     void prune_if_needed(){
  337         while(nodes > maxnodes){
  338             if(prune_best_node()==0) return; // cannot prune
  339         }
  340     }
  341 
  342     /****************************************************************
  343      *** historam support
  344      ****************************************************************/
  345 
  346     class addr_elem {
  347     public:
  348         addr_elem(const uint8_t *addr_,uint8_t depth_,int64_t count_):
  349             addr(),depth(depth_),count(count_){
  350             memcpy((void *)addr,addr_,sizeof(addr));
  351         }
  352         addr_elem() : addr(), depth(0), count(0) {
  353             memset((void *) addr, 0x00, sizeof(addr));
  354         }
  355         addr_elem &operator=(const addr_elem &n){
  356             memcpy((void *)this->addr,n.addr,sizeof(this->addr));
  357             this->count = n.count;
  358             this->depth = n.depth;
  359             return *this;
  360         }
  361         virtual ~addr_elem(){}
  362         const uint8_t addr[ADDRBYTES];         // maximum size address; v4 addresses have addr[4..15]=0
  363         uint8_t depth;                         // in bits; /depth
  364         TYPE count;
  365         
  366         bool is4() const { return isipv4(addr,ADDRBYTES);};
  367         std::string str() const { return ipstr(addr,ADDRBYTES,depth); }
  368     };
  369 
  370     /** get a histogram of the tree, and starting at a particular node 
  371      * The histogram is reported for every node that has a sum.
  372      * This is leaf nodes and inleafediate nodes.
  373      * This means that there must be a way for converting TYPE(count) to a boolean.
  374      *
  375      * @param depth - tracks current depth (in bits) into address.
  376      * @param ptr   - the node currently being queried
  377      * @param histogram - where the histogram is written
  378      */
  379     typedef std::vector<addr_elem> histogram_t;
  380     void get_histogram(int depth,const uint8_t *addr,const class node *ptr,histogram_t  &histogram) const{
  381         if(ptr->nodesum()){
  382             histogram.push_back(addr_elem(addr,depth,ptr->nodesum()));
  383             //return;
  384         }
  385         if(depth>max_histogram_depth) return;               // can't go deeper than this now
  386         
  387         /* create address with 0 and 1 added */
  388         uint8_t addr0[ADDRBYTES];
  389         uint8_t addr1[ADDRBYTES];
  390         
  391         memset(addr0,0,sizeof(addr0)); memcpy(addr0,addr,(depth+7)/8);
  392         memset(addr1,0,sizeof(addr1)); memcpy(addr1,addr,(depth+7)/8);
  393         setbit(addr1,sizeof(addr1),depth);
  394         
  395         if(ptr->ptr0) get_histogram(depth+1,addr0,ptr->ptr0,histogram);
  396         if(ptr->ptr1) get_histogram(depth+1,addr1,ptr->ptr1,histogram);
  397     }
  398         
  399     void get_histogram(histogram_t &histogram) const { // adds the histogram to the passed in vector
  400         uint8_t addr[ADDRBYTES];
  401         memset(addr,0,sizeof(addr));
  402         get_histogram(0,addr,root,histogram);
  403     }
  404 
  405     /****************************************************************
  406      *** output routines
  407      ****************************************************************/
  408 
  409     // returns true if addr[4..15]==0
  410     static std::string itos(int n){
  411         char buf[64];
  412         snprintf(buf,sizeof(buf),"%d",n);
  413         return std::string(buf);
  414     }
  415     static bool isipv4(const uint8_t *addr,size_t addrlen) { 
  416         if(addrlen==4) return true;
  417         for(u_int i=4;i<addrlen;i++){
  418             if(addr[i]!=0) return false;
  419         }
  420         return true;
  421     }
  422     static std::string ipstr(const uint8_t *addr,size_t addrlen,size_t depth){
  423         if(isipv4(addr,addrlen)){
  424             return ipv4(addr) + (depth<ipv4_bits  ? (std::string("/") + itos(depth)) : "");
  425         } else {
  426             return ipv6(addr) + (depth<ipv6_bits ? (std::string("/") + itos(depth)) : "");
  427         }
  428     }
  429 
  430     /* static service routines for displaying ipv4 and ipv6 addresses  */
  431     static std::string ipv4(const uint8_t *a){
  432         char buf[1024];
  433         snprintf(buf,sizeof(buf),"%d.%d.%d.%d",a[0],a[1],a[2],a[3]);
  434         return std::string(buf);
  435     }
  436     static std::string ipv6(const uint8_t *a){ 
  437         char buf[128];
  438         return std::string(inet_ntop(AF_INET6,a,buf,sizeof(buf)));
  439     }
  440     /* dump the stats */
  441     std::ostream & dump_stats(std::ostream &os) const {
  442         os << "nodes: " << nodes << "  maxnodes: " << maxnodes << " ctr_added: " << ctr_added << " pruned: " << pruned << "\n";
  443         os << "cache_hits: " << cache_hits << "\n";
  444         os << "cache_misses: " << cache_misses << "\n";
  445         return os;
  446     }
  447     /* dump the tree; largely for debugging */
  448     std::ostream & dump(std::ostream &os) const {
  449         histogram_t histogram;
  450         get_histogram(histogram);
  451         dump_stats(os);
  452         os << "histogram size: " << histogram.size() << "\n";
  453         for(size_t i=0;i<histogram.size();i++){
  454             os << histogram.at(i).str() << "  count=" << histogram.at(i).count << "\n";
  455         }
  456         return os;
  457     }
  458 
  459 };
  460 
  461 
  462 /** Add 'val' to the node associated with a particular ip address.
  463  * @param addr - the address
  464  *
  465  * @param addrlen - the length of the address (allows mixing of IPv4 & IPv6 in the same gree
  466  *
  467  * @param val - what to add. Use "1" to tally the number of packets,
  468  * "bytes" to count the number of bytes associated with each IP
  469  * address.
  470  */ 
  471 template <typename TYPE,size_t ADDRBYTES>
  472 void iptreet<TYPE,ADDRBYTES>::add(const uint8_t *addr,size_t addrlen,TYPE val)
  473 {
  474     prune_if_needed();
  475     if(addrlen > ADDRBYTES) addrlen=ADDRBYTES;
  476 
  477     u_int addr_bits = addrlen * 8;  // in bits
  478 
  479     
  480     /* check the cache first */
  481     ssize_t i = cache_search(addr,addrlen);
  482     if(i>=0){
  483         cache[i].ptr->add(val);
  484         return;
  485     }
  486 
  487     /* descend the radix tree until we run out of bits, or we have a
  488        node with no pointers and a non-zero sum.
  489      */
  490 
  491     node *ptr = root;               // start at the root
  492     for(u_int depth=0;depth<=addr_bits;depth++){
  493         if(depth==addr_bits){       // reached end of address
  494             ptr->add(val);          // increment this node (and all of its descendants 
  495             cache_replace(addr,addrlen,ptr);
  496             return;
  497         }
  498         if((ptr->tsum > 0) && (ptr->ptr0==0) && (ptr->ptr1==0)){
  499             ptr->add(val);
  500             cache_replace(addr,addrlen,ptr);
  501             return;
  502         }
  503         /* Not a leaf node, so go down a level based on the next bit,
  504          * extending if necessary.
  505          */
  506         if(bit(addr,depth)==0){
  507             if(ptr->ptr0==0){
  508                 ptr->ptr0 = new node(ptr);
  509                 nodes++;
  510                 ctr_added++;
  511             }
  512             ptr = ptr->ptr0;
  513         } else {
  514             if(ptr->ptr1==0){
  515                 ptr->ptr1 = new node(ptr); 
  516                 nodes++;
  517                 ctr_added++;
  518             }
  519             ptr = ptr->ptr1;
  520         }
  521     }
  522     assert(0);                          // should never happen
  523 }
  524 
  525 
  526 /* a structure for a pair of IP addresses */
  527 class ip2tree:public iptreet<uint64_t,32> {
  528 public:
  529     /* de-interleave a pair of addresses */
  530     static void un_pair(uint8_t *addr1,uint8_t *addr2,size_t addr12len,size_t *depth1,size_t *depth2,const uint8_t *addr,size_t addrlen,size_t depth){
  531         for(size_t i=0;i<addrlen*8/2;i++){
  532             if(iptreet<uint64_t,32>::bit(addr,i*2))
  533                 iptreet<uint64_t,32>::setbit(addr1, addr12len, i);
  534             if(iptreet<uint64_t,32>::bit(addr,i*2+1))
  535                 iptreet<uint64_t,32>::setbit(addr2, addr12len, i);
  536         }
  537         *depth1 = (depth+1)/2;
  538         *depth2 = (depth)/2;
  539     }
  540 
  541     ip2tree(int maxnodes_):iptreet<uint64_t,32>(maxnodes_){}
  542     virtual ~ip2tree(){};
  543     /* str requires more work */
  544     static std::string ip2str(const uint8_t *addr,size_t addrlen,size_t depth){
  545         uint8_t addr1[16];memset(addr1,0,sizeof(addr1));
  546         uint8_t addr2[16];memset(addr2,0,sizeof(addr2));
  547         size_t depth1=0,depth2=0;
  548         ip2tree::un_pair(addr1,addr2,sizeof(addr1),&depth1,&depth2,addr,addrlen,depth);
  549         return ipstr(addr1,sizeof(addr1),depth1) + " " + ipstr(addr2,sizeof(addr2),depth2);
  550     }
  551 
  552     /* 2tree needs its own dump because a different ipstr is called */
  553     std::ostream & dump(std::ostream &os) const {
  554         histogram_t histogram;
  555         get_histogram(histogram);
  556         os << "nodes: " << nodes << "  histogram size: " << histogram.size() << "\n";
  557         for(size_t i=0;i<histogram.size();i++){
  558             const addr_elem &a = histogram.at(i);
  559             os << ip2str(a.addr,sizeof(a.addr),a.depth) << "  count=" << histogram.at(i).count << "\n";
  560         }
  561         return os;
  562     }
  563 
  564     /* Add a pair of addresses by interleaving them */
  565     void add_pair(const uint8_t *addr1,const uint8_t *addr2,size_t addrlen,uint64_t val){
  566         uint8_t addr[32];
  567         memset(addr,0,sizeof(addr));
  568         /* Interleave on the bit by bit level */
  569         for(size_t i=0;i<addrlen*8;i++){
  570             if(iptreet<uint64_t,32>::bit(addr1,i))
  571                 iptreet<uint64_t,32>::setbit(addr,sizeof(addr),i*2);
  572             if(iptreet<uint64_t,32>::bit(addr2,i))
  573                 iptreet<uint64_t,32>::setbit(addr,sizeof(addr),i*2+1);
  574         }
  575         add(addr,addrlen*2,val); /* Add it */
  576     }
  577 
  578 };
  579 
  580 typedef iptreet<uint64_t,16> iptree;       // simple tree for counting; reimplement so val is tcount
  581 template <typename T,size_t ADDRBYTES> std::ostream & operator <<(std::ostream &os,const iptreet<T,ADDRBYTES> &ipt) {
  582     return ipt.dump(os);
  583 }
  584 
  585 inline std::ostream & operator <<(std::ostream &os,const ip2tree &ipt) {
  586     return ipt.dump(os);
  587 }
  588 
  589 
  590 #endif