"Fossies" - the Fresh Open Source Software Archive

Member "daq-2.0.7/sfbpf/sf_optimize.c" (8 Apr 2020, 59876 Bytes) of package /linux/misc/daq-2.0.7.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "sf_optimize.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 2.0.6_vs_2.0.7.

    1 /*
    2  * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
    3  *  The Regents of the University of California.  All rights reserved.
    4  *
    5  * Some portions Copyright (C) 2010-2013 Sourcefire, Inc.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that: (1) source code distributions
    9  * retain the above copyright notice and this paragraph in its entirety, (2)
   10  * distributions including binary code include the above copyright notice and
   11  * this paragraph in its entirety in the documentation or other materials
   12  * provided with the distribution, and (3) all advertising materials mentioning
   13  * features or use of this software display the following acknowledgement:
   14  * ``This product includes software developed by the University of California,
   15  * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
   16  * the University nor the names of its contributors may be used to endorse
   17  * or promote products derived from this software without specific prior
   18  * written permission.
   19  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
   20  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
   21  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
   22  *
   23  *  Optimization module for tcpdump intermediate representation.
   24  */
   25 #ifndef lint
   26 static const char rcsid[] =
   27     "@(#) $Header: //depot/firepower/daq-opensource/DAQ_2_0_7/sfbpf/sf_optimize.c#1 $ (LBL)";
   28 #endif
   29 
   30 #ifdef HAVE_CONFIG_H
   31 #include "config.h"
   32 #endif
   33 
   34 #ifdef WIN32
   35 #include "win32-stdinc.h"
   36 #else /* WIN32 */
   37 #if HAVE_INTTYPES_H
   38 #include <inttypes.h>
   39 #elif HAVE_STDINT_H
   40 #include <stdint.h>
   41 #endif
   42 #ifdef HAVE_SYS_BITYPES_H
   43 #include <sys/bitypes.h>
   44 #endif
   45 #include <sys/types.h>
   46 #endif /* WIN32 */
   47 
   48 #include <stdio.h>
   49 #include <stdlib.h>
   50 #include <memory.h>
   51 #include <string.h>
   52 
   53 #include <errno.h>
   54 
   55 #include "sfbpf-int.h"
   56 
   57 #include "gencode.h"
   58 
   59 #ifdef BDEBUG
   60 extern int dflag;
   61 #endif
   62 
   63 #if defined(MSDOS) && !defined(__DJGPP__)
   64 extern int _w32_ffs(int mask);
   65 #define ffs _w32_ffs
   66 #endif
   67 
   68 #if defined(WIN32) && defined (_MSC_VER)
   69 int ffs(int mask);
   70 #endif
   71 
   72 /*
   73  * Represents a deleted instruction.
   74  */
   75 #define NOP -1
   76 
   77 /*
   78  * Register numbers for use-def values.
   79  * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
   80  * location.  A_ATOM is the accumulator and X_ATOM is the index
   81  * register.
   82  */
   83 #define A_ATOM BPF_MEMWORDS
   84 #define X_ATOM (BPF_MEMWORDS+1)
   85 
   86 /*
   87  * This define is used to represent *both* the accumulator and
   88  * x register in use-def computations.
   89  * Currently, the use-def code assumes only one definition per instruction.
   90  */
   91 #define AX_ATOM N_ATOMS
   92 
   93 /*
   94  * A flag to indicate that further optimization is needed.
   95  * Iterative passes are continued until a given pass yields no
   96  * branch movement.
   97  */
   98 static int done;
   99 
  100 /*
  101  * A block is marked if only if its mark equals the current mark.
  102  * Rather than traverse the code array, marking each item, 'cur_mark' is
  103  * incremented.  This automatically makes each element unmarked.
  104  */
  105 static int cur_mark;
  106 #define isMarked(p) ((p)->mark == cur_mark)
  107 #define unMarkAll() cur_mark += 1
  108 #define Mark(p) ((p)->mark = cur_mark)
  109 
  110 static void opt_init(struct block *);
  111 static void opt_cleanup(void);
  112 
  113 static void make_marks(struct block *);
  114 static void mark_code(struct block *);
  115 
  116 static void intern_blocks(struct block *);
  117 
  118 static int eq_slist(struct slist *, struct slist *);
  119 
  120 static void find_levels_r(struct block *);
  121 
  122 static void find_levels(struct block *);
  123 static void find_dom(struct block *);
  124 static void propedom(struct edge *);
  125 static void find_edom(struct block *);
  126 static void find_closure(struct block *);
  127 static int atomuse(struct stmt *);
  128 static int atomdef(struct stmt *);
  129 static void compute_local_ud(struct block *);
  130 static void find_ud(struct block *);
  131 static void init_val(void);
  132 static int F(int, int, int);
  133 static inline void vstore(struct stmt *, int *, int, int);
  134 static void opt_blk(struct block *, int);
  135 static int use_conflict(struct block *, struct block *);
  136 static void opt_j(struct edge *);
  137 static void or_pullup(struct block *);
  138 static void and_pullup(struct block *);
  139 static void opt_blks(struct block *, int);
  140 static inline void link_inedge(struct edge *, struct block *);
  141 static void find_inedges(struct block *);
  142 static void opt_root(struct block **);
  143 static void opt_loop(struct block *, int);
  144 static void fold_op(struct stmt *, int, int);
  145 static inline struct slist *this_op(struct slist *);
  146 static void opt_not(struct block *);
  147 static void opt_peep(struct block *);
  148 static void opt_stmt(struct stmt *, int[], int);
  149 static void deadstmt(struct stmt *, struct stmt *[]);
  150 static void opt_deadstores(struct block *);
  151 static struct block *fold_edge(struct block *, struct edge *);
  152 static inline int eq_blk(struct block *, struct block *);
  153 static int slength(struct slist *);
  154 static int count_blocks(struct block *);
  155 static void number_blks_r(struct block *);
  156 static int count_stmts(struct block *);
  157 static int convert_code_r(struct block *);
  158 #ifdef BDEBUG
  159 static void opt_dump(struct block *);
  160 #endif
  161 
  162 static int n_blocks;
  163 struct block **blocks;
  164 static int n_edges;
  165 struct edge **edges;
  166 
  167 /*
  168  * A bit vector set representation of the dominators.
  169  * We round up the set size to the next power of two.
  170  */
  171 static int nodewords;
  172 static int edgewords;
  173 struct block **levels;
  174 bpf_u_int32 *space;
  175 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
  176 /*
  177  * True if a is in uset {p}
  178  */
  179 #define SET_MEMBER(p, a) \
  180 ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
  181 
  182 /*
  183  * Add 'a' to uset p.
  184  */
  185 #define SET_INSERT(p, a) \
  186 (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
  187 
  188 /*
  189  * Delete 'a' from uset p.
  190  */
  191 #define SET_DELETE(p, a) \
  192 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
  193 
  194 /*
  195  * a := a intersect b
  196  */
  197 #define SET_INTERSECT(a, b, n)\
  198 {\
  199     register bpf_u_int32 *_x = a, *_y = b;\
  200     register int _n = n;\
  201     while (--_n >= 0) *_x++ &= *_y++;\
  202 }
  203 
  204 /*
  205  * a := a - b
  206  */
  207 #define SET_SUBTRACT(a, b, n)\
  208 {\
  209     register bpf_u_int32 *_x = a, *_y = b;\
  210     register int _n = n;\
  211     while (--_n >= 0) *_x++ &=~ *_y++;\
  212 }
  213 
  214 /*
  215  * a := a union b
  216  */
  217 #define SET_UNION(a, b, n)\
  218 {\
  219     register bpf_u_int32 *_x = a, *_y = b;\
  220     register int _n = n;\
  221     while (--_n >= 0) *_x++ |= *_y++;\
  222 }
  223 
  224 static uset all_dom_sets;
  225 static uset all_closure_sets;
  226 static uset all_edge_sets;
  227 
  228 #ifndef MAX
  229 #define MAX(a,b) ((a)>(b)?(a):(b))
  230 #endif
  231 
  232 static void find_levels_r(b)
  233      struct block *b;
  234 {
  235     int level;
  236 
  237     if (isMarked(b))
  238         return;
  239 
  240     Mark(b);
  241     b->link = 0;
  242 
  243     if (JT(b))
  244     {
  245         find_levels_r(JT(b));
  246         find_levels_r(JF(b));
  247         level = MAX(JT(b)->level, JF(b)->level) + 1;
  248     }
  249     else
  250         level = 0;
  251     b->level = level;
  252     b->link = levels[level];
  253     levels[level] = b;
  254 }
  255 
  256 /*
  257  * Level graph.  The levels go from 0 at the leaves to
  258  * N_LEVELS at the root.  The levels[] array points to the
  259  * first node of the level list, whose elements are linked
  260  * with the 'link' field of the struct block.
  261  */
  262 static void find_levels(root)
  263      struct block *root;
  264 {
  265     memset((char *) levels, 0, n_blocks * sizeof(*levels));
  266     unMarkAll();
  267     find_levels_r(root);
  268 }
  269 
  270 /*
  271  * Find dominator relationships.
  272  * Assumes graph has been leveled.
  273  */
  274 static void find_dom(root)
  275      struct block *root;
  276 {
  277     int i;
  278     struct block *b;
  279     bpf_u_int32 *x;
  280 
  281     /*
  282      * Initialize sets to contain all nodes.
  283      */
  284     x = all_dom_sets;
  285     i = n_blocks * nodewords;
  286     while (--i >= 0)
  287         *x++ = ~0;
  288     /* Root starts off empty. */
  289     for (i = nodewords; --i >= 0;)
  290         root->dom[i] = 0;
  291 
  292     /* root->level is the highest level no found. */
  293     for (i = root->level; i >= 0; --i)
  294     {
  295         for (b = levels[i]; b; b = b->link)
  296         {
  297             SET_INSERT(b->dom, b->id);
  298             if (JT(b) == 0)
  299                 continue;
  300             SET_INTERSECT(JT(b)->dom, b->dom, nodewords);
  301             SET_INTERSECT(JF(b)->dom, b->dom, nodewords);
  302         }
  303     }
  304 }
  305 
  306 static void propedom(ep)
  307      struct edge *ep;
  308 {
  309     SET_INSERT(ep->edom, ep->id);
  310     if (ep->succ)
  311     {
  312         SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords);
  313         SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords);
  314     }
  315 }
  316 
  317 /*
  318  * Compute edge dominators.
  319  * Assumes graph has been leveled and predecessors established.
  320  */
  321 static void find_edom(root)
  322      struct block *root;
  323 {
  324     int i;
  325     uset x;
  326     struct block *b;
  327 
  328     x = all_edge_sets;
  329     for (i = n_edges * edgewords; --i >= 0;)
  330         x[i] = ~0;
  331 
  332     /* root->level is the highest level no found. */
  333     memset(root->et.edom, 0, edgewords * sizeof(*(uset) 0));
  334     memset(root->ef.edom, 0, edgewords * sizeof(*(uset) 0));
  335     for (i = root->level; i >= 0; --i)
  336     {
  337         for (b = levels[i]; b != 0; b = b->link)
  338         {
  339             propedom(&b->et);
  340             propedom(&b->ef);
  341         }
  342     }
  343 }
  344 
  345 /*
  346  * Find the backwards transitive closure of the flow graph.  These sets
  347  * are backwards in the sense that we find the set of nodes that reach
  348  * a given node, not the set of nodes that can be reached by a node.
  349  *
  350  * Assumes graph has been leveled.
  351  */
  352 static void find_closure(root)
  353      struct block *root;
  354 {
  355     int i;
  356     struct block *b;
  357 
  358     /*
  359      * Initialize sets to contain no nodes.
  360      */
  361     memset((char *) all_closure_sets, 0, n_blocks * nodewords * sizeof(*all_closure_sets));
  362 
  363     /* root->level is the highest level no found. */
  364     for (i = root->level; i >= 0; --i)
  365     {
  366         for (b = levels[i]; b; b = b->link)
  367         {
  368             SET_INSERT(b->closure, b->id);
  369             if (JT(b) == 0)
  370                 continue;
  371             SET_UNION(JT(b)->closure, b->closure, nodewords);
  372             SET_UNION(JF(b)->closure, b->closure, nodewords);
  373         }
  374     }
  375 }
  376 
  377 /*
  378  * Return the register number that is used by s.  If A and X are both
  379  * used, return AX_ATOM.  If no register is used, return -1.
  380  *
  381  * The implementation should probably change to an array access.
  382  */
  383 static int atomuse(s)
  384      struct stmt *s;
  385 {
  386     register int c = s->code;
  387 
  388     if (c == NOP)
  389         return -1;
  390 
  391     switch (BPF_CLASS(c))
  392     {
  393 
  394         case BPF_RET:
  395             return (BPF_RVAL(c) == BPF_A) ? A_ATOM : (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
  396 
  397         case BPF_LD:
  398         case BPF_LDX:
  399             return (BPF_MODE(c) == BPF_IND) ? X_ATOM : (BPF_MODE(c) == BPF_MEM) ? s->k : -1;
  400 
  401         case BPF_ST:
  402             return A_ATOM;
  403 
  404         case BPF_STX:
  405             return X_ATOM;
  406 
  407         case BPF_JMP:
  408         case BPF_ALU:
  409             if (BPF_SRC(c) == BPF_X)
  410                 return AX_ATOM;
  411             return A_ATOM;
  412 
  413         case BPF_MISC:
  414             return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
  415     }
  416     abort();
  417     /* NOTREACHED */
  418 }
  419 
  420 /*
  421  * Return the register number that is defined by 's'.  We assume that
  422  * a single stmt cannot define more than one register.  If no register
  423  * is defined, return -1.
  424  *
  425  * The implementation should probably change to an array access.
  426  */
  427 static int atomdef(s)
  428      struct stmt *s;
  429 {
  430     if (s->code == NOP)
  431         return -1;
  432 
  433     switch (BPF_CLASS(s->code))
  434     {
  435 
  436         case BPF_LD:
  437         case BPF_ALU:
  438             return A_ATOM;
  439 
  440         case BPF_LDX:
  441             return X_ATOM;
  442 
  443         case BPF_ST:
  444         case BPF_STX:
  445             return s->k;
  446 
  447         case BPF_MISC:
  448             return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
  449     }
  450     return -1;
  451 }
  452 
  453 /*
  454  * Compute the sets of registers used, defined, and killed by 'b'.
  455  *
  456  * "Used" means that a statement in 'b' uses the register before any
  457  * statement in 'b' defines it, i.e. it uses the value left in
  458  * that register by a predecessor block of this block.
  459  * "Defined" means that a statement in 'b' defines it.
  460  * "Killed" means that a statement in 'b' defines it before any
  461  * statement in 'b' uses it, i.e. it kills the value left in that
  462  * register by a predecessor block of this block.
  463  */
  464 static void compute_local_ud(b)
  465      struct block *b;
  466 {
  467     struct slist *s;
  468     atomset def = 0, use = 0, kill = 0;
  469     int atom;
  470 
  471     for (s = b->stmts; s; s = s->next)
  472     {
  473         if (s->s.code == NOP)
  474             continue;
  475         atom = atomuse(&s->s);
  476         if (atom >= 0)
  477         {
  478             if (atom == AX_ATOM)
  479             {
  480                 if (!ATOMELEM(def, X_ATOM))
  481                     use |= ATOMMASK(X_ATOM);
  482                 if (!ATOMELEM(def, A_ATOM))
  483                     use |= ATOMMASK(A_ATOM);
  484             }
  485             else if (atom < N_ATOMS)
  486             {
  487                 if (!ATOMELEM(def, atom))
  488                     use |= ATOMMASK(atom);
  489             }
  490             else
  491                 abort();
  492         }
  493         atom = atomdef(&s->s);
  494         if (atom >= 0)
  495         {
  496             if (!ATOMELEM(use, atom))
  497                 kill |= ATOMMASK(atom);
  498             def |= ATOMMASK(atom);
  499         }
  500     }
  501     if (BPF_CLASS(b->s.code) == BPF_JMP)
  502     {
  503         /*
  504          * XXX - what about RET?
  505          */
  506         atom = atomuse(&b->s);
  507         if (atom >= 0)
  508         {
  509             if (atom == AX_ATOM)
  510             {
  511                 if (!ATOMELEM(def, X_ATOM))
  512                     use |= ATOMMASK(X_ATOM);
  513                 if (!ATOMELEM(def, A_ATOM))
  514                     use |= ATOMMASK(A_ATOM);
  515             }
  516             else if (atom < N_ATOMS)
  517             {
  518                 if (!ATOMELEM(def, atom))
  519                     use |= ATOMMASK(atom);
  520             }
  521             else
  522                 abort();
  523         }
  524     }
  525 
  526     b->def = def;
  527     b->kill = kill;
  528     b->in_use = use;
  529 }
  530 
  531 /*
  532  * Assume graph is already leveled.
  533  */
  534 static void find_ud(root)
  535      struct block *root;
  536 {
  537     int i, maxlevel;
  538     struct block *p;
  539 
  540     /*
  541      * root->level is the highest level no found;
  542      * count down from there.
  543      */
  544     maxlevel = root->level;
  545     for (i = maxlevel; i >= 0; --i)
  546         for (p = levels[i]; p; p = p->link)
  547         {
  548             compute_local_ud(p);
  549             p->out_use = 0;
  550         }
  551 
  552     for (i = 1; i <= maxlevel; ++i)
  553     {
  554         for (p = levels[i]; p; p = p->link)
  555         {
  556             p->out_use |= JT(p)->in_use | JF(p)->in_use;
  557             p->in_use |= p->out_use & ~p->kill;
  558         }
  559     }
  560 }
  561 
  562 /*
  563  * These data structures are used in a Cocke and Shwarz style
  564  * value numbering scheme.  Since the flowgraph is acyclic,
  565  * exit values can be propagated from a node's predecessors
  566  * provided it is uniquely defined.
  567  */
  568 struct valnode
  569 {
  570     int code;
  571     int v0, v1;
  572     int val;
  573     struct valnode *next;
  574 };
  575 
  576 #define MODULUS 213
  577 static struct valnode *hashtbl[MODULUS];
  578 static int curval;
  579 static int maxval;
  580 
  581 /* Integer constants mapped with the load immediate opcode. */
  582 #define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L)
  583 
  584 struct vmapinfo
  585 {
  586     int is_const;
  587     bpf_int32 const_val;
  588 };
  589 
  590 struct vmapinfo *vmap;
  591 struct valnode *vnode_base;
  592 struct valnode *next_vnode;
  593 
  594 static void init_val()
  595 {
  596     curval = 0;
  597     next_vnode = vnode_base;
  598     memset((char *) vmap, 0, maxval * sizeof(*vmap));
  599     memset((char *) hashtbl, 0, sizeof hashtbl);
  600 }
  601 
  602 /* Because we really don't have an IR, this stuff is a little messy. */
  603 static int F(code, v0, v1)
  604      int code;
  605      int v0, v1;
  606 {
  607     u_int hash;
  608     int val;
  609     struct valnode *p;
  610 
  611     hash = (u_int) code ^ (v0 << 4) ^ (v1 << 8);
  612     hash %= MODULUS;
  613 
  614     for (p = hashtbl[hash]; p; p = p->next)
  615         if (p->code == code && p->v0 == v0 && p->v1 == v1)
  616             return p->val;
  617 
  618     val = ++curval;
  619     if (BPF_MODE(code) == BPF_IMM && (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX))
  620     {
  621         vmap[val].const_val = v0;
  622         vmap[val].is_const = 1;
  623     }
  624     p = next_vnode++;
  625     p->val = val;
  626     p->code = code;
  627     p->v0 = v0;
  628     p->v1 = v1;
  629     p->next = hashtbl[hash];
  630     hashtbl[hash] = p;
  631 
  632     return val;
  633 }
  634 
  635 static inline void vstore(s, valp, newval, alter)
  636      struct stmt *s;
  637      int *valp;
  638      int newval;
  639      int alter;
  640 {
  641     if (alter && *valp == newval)
  642         s->code = NOP;
  643     else
  644         *valp = newval;
  645 }
  646 
  647 static void fold_op(s, v0, v1)
  648      struct stmt *s;
  649      int v0, v1;
  650 {
  651     bpf_u_int32 a, b;
  652 
  653     a = vmap[v0].const_val;
  654     b = vmap[v1].const_val;
  655 
  656     switch (BPF_OP(s->code))
  657     {
  658         case BPF_ADD:
  659             a += b;
  660             break;
  661 
  662         case BPF_SUB:
  663             a -= b;
  664             break;
  665 
  666         case BPF_MUL:
  667             a *= b;
  668             break;
  669 
  670         case BPF_DIV:
  671             if (b == 0)
  672                 bpf_error("division by zero");
  673             a /= b;
  674             break;
  675 
  676         case BPF_AND:
  677             a &= b;
  678             break;
  679 
  680         case BPF_OR:
  681             a |= b;
  682             break;
  683 
  684         case BPF_LSH:
  685             a <<= b;
  686             break;
  687 
  688         case BPF_RSH:
  689             a >>= b;
  690             break;
  691 
  692         case BPF_NEG:
  693             a = -a;
  694             break;
  695 
  696         default:
  697             abort();
  698     }
  699     s->k = a;
  700     s->code = BPF_LD | BPF_IMM;
  701     done = 0;
  702 }
  703 
  704 static inline struct slist *this_op(s)
  705      struct slist *s;
  706 {
  707     while (s != 0 && s->s.code == NOP)
  708         s = s->next;
  709     return s;
  710 }
  711 
  712 static void opt_not(b)
  713      struct block *b;
  714 {
  715     struct block *tmp = JT(b);
  716 
  717     JT(b) = JF(b);
  718     JF(b) = tmp;
  719 }
  720 
  721 static void opt_peep(b)
  722      struct block *b;
  723 {
  724     struct slist *s;
  725     struct slist *next, *last;
  726     int val;
  727 
  728     s = b->stmts;
  729     if (s == 0)
  730         return;
  731 
  732     last = s;
  733     for ( /*empty */ ; /*empty */ ; s = next)
  734     {
  735         /*
  736          * Skip over nops.
  737          */
  738         s = this_op(s);
  739         if (s == 0)
  740             break;              /* nothing left in the block */
  741 
  742         /*
  743          * Find the next real instruction after that one
  744          * (skipping nops).
  745          */
  746         next = this_op(s->next);
  747         if (next == 0)
  748             break;              /* no next instruction */
  749         last = next;
  750 
  751         /*
  752          * st  M[k] --> st  M[k]
  753          * ldx M[k]     tax
  754          */
  755         if (s->s.code == BPF_ST && next->s.code == (BPF_LDX | BPF_MEM) && s->s.k == next->s.k)
  756         {
  757             done = 0;
  758             next->s.code = BPF_MISC | BPF_TAX;
  759         }
  760         /*
  761          * ld  #k   --> ldx  #k
  762          * tax          txa
  763          */
  764         if (s->s.code == (BPF_LD | BPF_IMM) && next->s.code == (BPF_MISC | BPF_TAX))
  765         {
  766             s->s.code = BPF_LDX | BPF_IMM;
  767             next->s.code = BPF_MISC | BPF_TXA;
  768             done = 0;
  769         }
  770         /*
  771          * This is an ugly special case, but it happens
  772          * when you say tcp[k] or udp[k] where k is a constant.
  773          */
  774         if (s->s.code == (BPF_LD | BPF_IMM))
  775         {
  776             struct slist *add, *tax, *ild;
  777 
  778             /*
  779              * Check that X isn't used on exit from this
  780              * block (which the optimizer might cause).
  781              * We know the code generator won't generate
  782              * any local dependencies.
  783              */
  784             if (ATOMELEM(b->out_use, X_ATOM))
  785                 continue;
  786 
  787             /*
  788              * Check that the instruction following the ldi
  789              * is an addx, or it's an ldxms with an addx
  790              * following it (with 0 or more nops between the
  791              * ldxms and addx).
  792              */
  793             if (next->s.code != (BPF_LDX | BPF_MSH | BPF_B))
  794                 add = next;
  795             else
  796                 add = this_op(next->next);
  797             if (add == 0 || add->s.code != (BPF_ALU | BPF_ADD | BPF_X))
  798                 continue;
  799 
  800             /*
  801              * Check that a tax follows that (with 0 or more
  802              * nops between them).
  803              */
  804             tax = this_op(add->next);
  805             if (tax == 0 || tax->s.code != (BPF_MISC | BPF_TAX))
  806                 continue;
  807 
  808             /*
  809              * Check that an ild follows that (with 0 or more
  810              * nops between them).
  811              */
  812             ild = this_op(tax->next);
  813             if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD || BPF_MODE(ild->s.code) != BPF_IND)
  814                 continue;
  815             /*
  816              * We want to turn this sequence:
  817              *
  818              * (004) ldi     #0x2       {s}
  819              * (005) ldxms   [14]       {next}  -- optional
  820              * (006) addx           {add}
  821              * (007) tax            {tax}
  822              * (008) ild     [x+0]      {ild}
  823              *
  824              * into this sequence:
  825              *
  826              * (004) nop
  827              * (005) ldxms   [14]
  828              * (006) nop
  829              * (007) nop
  830              * (008) ild     [x+2]
  831              *
  832              * XXX We need to check that X is not
  833              * subsequently used, because we want to change
  834              * what'll be in it after this sequence.
  835              *
  836              * We know we can eliminate the accumulator
  837              * modifications earlier in the sequence since
  838              * it is defined by the last stmt of this sequence
  839              * (i.e., the last statement of the sequence loads
  840              * a value into the accumulator, so we can eliminate
  841              * earlier operations on the accumulator).
  842              */
  843             ild->s.k += s->s.k;
  844             s->s.code = NOP;
  845             add->s.code = NOP;
  846             tax->s.code = NOP;
  847             done = 0;
  848         }
  849     }
  850     /*
  851      * If the comparison at the end of a block is an equality
  852      * comparison against a constant, and nobody uses the value
  853      * we leave in the A register at the end of a block, and
  854      * the operation preceding the comparison is an arithmetic
  855      * operation, we can sometime optimize it away.
  856      */
  857     if (b->s.code == (BPF_JMP | BPF_JEQ | BPF_K) && !ATOMELEM(b->out_use, A_ATOM))
  858     {
  859         /*
  860          * We can optimize away certain subtractions of the
  861          * X register.
  862          */
  863         if (last->s.code == (BPF_ALU | BPF_SUB | BPF_X))
  864         {
  865             val = b->val[X_ATOM];
  866             if (vmap[val].is_const)
  867             {
  868                 /*
  869                  * If we have a subtract to do a comparison,
  870                  * and the X register is a known constant,
  871                  * we can merge this value into the
  872                  * comparison:
  873                  *
  874                  * sub x  ->    nop
  875                  * jeq #y   jeq #(x+y)
  876                  */
  877                 b->s.k += vmap[val].const_val;
  878                 last->s.code = NOP;
  879                 done = 0;
  880             }
  881             else if (b->s.k == 0)
  882             {
  883                 /*
  884                  * If the X register isn't a constant,
  885                  * and the comparison in the test is
  886                  * against 0, we can compare with the
  887                  * X register, instead:
  888                  *
  889                  * sub x  ->    nop
  890                  * jeq #0   jeq x
  891                  */
  892                 last->s.code = NOP;
  893                 b->s.code = BPF_JMP | BPF_JEQ | BPF_X;
  894                 done = 0;
  895             }
  896         }
  897         /*
  898          * Likewise, a constant subtract can be simplified:
  899          *
  900          * sub #x ->    nop
  901          * jeq #y ->    jeq #(x+y)
  902          */
  903         else if (last->s.code == (BPF_ALU | BPF_SUB | BPF_K))
  904         {
  905             last->s.code = NOP;
  906             b->s.k += last->s.k;
  907             done = 0;
  908         }
  909         /*
  910          * And, similarly, a constant AND can be simplified
  911          * if we're testing against 0, i.e.:
  912          *
  913          * and #k   nop
  914          * jeq #0  ->   jset #k
  915          */
  916         else if (last->s.code == (BPF_ALU | BPF_AND | BPF_K) && b->s.k == 0)
  917         {
  918             b->s.k = last->s.k;
  919             b->s.code = BPF_JMP | BPF_K | BPF_JSET;
  920             last->s.code = NOP;
  921             done = 0;
  922             opt_not(b);
  923         }
  924     }
  925     /*
  926      * jset #0        ->   never
  927      * jset #ffffffff ->   always
  928      */
  929     if (b->s.code == (BPF_JMP | BPF_K | BPF_JSET))
  930     {
  931         if (b->s.k == 0)
  932             JT(b) = JF(b);
  933         if (b->s.k == 0xffffffff)
  934             JF(b) = JT(b);
  935     }
  936     /*
  937      * If we're comparing against the index register, and the index
  938      * register is a known constant, we can just compare against that
  939      * constant.
  940      */
  941     val = b->val[X_ATOM];
  942     if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X)
  943     {
  944         bpf_int32 v = vmap[val].const_val;
  945         b->s.code &= ~BPF_X;
  946         b->s.k = v;
  947     }
  948     /*
  949      * If the accumulator is a known constant, we can compute the
  950      * comparison result.
  951      */
  952     val = b->val[A_ATOM];
  953     if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K)
  954     {
  955         bpf_int32 v = vmap[val].const_val;
  956         switch (BPF_OP(b->s.code))
  957         {
  958 
  959             case BPF_JEQ:
  960                 v = v == b->s.k;
  961                 break;
  962 
  963             case BPF_JGT:
  964                 v = (unsigned) v > b->s.k;
  965                 break;
  966 
  967             case BPF_JGE:
  968                 v = (unsigned) v >= b->s.k;
  969                 break;
  970 
  971             case BPF_JSET:
  972                 v &= b->s.k;
  973                 break;
  974 
  975             default:
  976                 abort();
  977         }
  978         if (JF(b) != JT(b))
  979             done = 0;
  980         if (v)
  981             JF(b) = JT(b);
  982         else
  983             JT(b) = JF(b);
  984     }
  985 }
  986 
  987 /*
  988  * Compute the symbolic value of expression of 's', and update
  989  * anything it defines in the value table 'val'.  If 'alter' is true,
  990  * do various optimizations.  This code would be cleaner if symbolic
  991  * evaluation and code transformations weren't folded together.
  992  */
  993 static void opt_stmt(s, val, alter)
  994      struct stmt *s;
  995      int val[];
  996      int alter;
  997 {
  998     int op;
  999     int v;
 1000 
 1001     switch (s->code)
 1002     {
 1003 
 1004         case BPF_LD | BPF_ABS | BPF_W:
 1005         case BPF_LD | BPF_ABS | BPF_H:
 1006         case BPF_LD | BPF_ABS | BPF_B:
 1007             v = F(s->code, s->k, 0L);
 1008             vstore(s, &val[A_ATOM], v, alter);
 1009             break;
 1010 
 1011         case BPF_LD | BPF_IND | BPF_W:
 1012         case BPF_LD | BPF_IND | BPF_H:
 1013         case BPF_LD | BPF_IND | BPF_B:
 1014             v = val[X_ATOM];
 1015             if (alter && vmap[v].is_const)
 1016             {
 1017                 s->code = BPF_LD | BPF_ABS | BPF_SIZE(s->code);
 1018                 s->k += vmap[v].const_val;
 1019                 v = F(s->code, s->k, 0L);
 1020                 done = 0;
 1021             }
 1022             else
 1023                 v = F(s->code, s->k, v);
 1024             vstore(s, &val[A_ATOM], v, alter);
 1025             break;
 1026 
 1027         case BPF_LD | BPF_LEN:
 1028             v = F(s->code, 0L, 0L);
 1029             vstore(s, &val[A_ATOM], v, alter);
 1030             break;
 1031 
 1032         case BPF_LD | BPF_IMM:
 1033             v = K(s->k);
 1034             vstore(s, &val[A_ATOM], v, alter);
 1035             break;
 1036 
 1037         case BPF_LDX | BPF_IMM:
 1038             v = K(s->k);
 1039             vstore(s, &val[X_ATOM], v, alter);
 1040             break;
 1041 
 1042         case BPF_LDX | BPF_MSH | BPF_B:
 1043             v = F(s->code, s->k, 0L);
 1044             vstore(s, &val[X_ATOM], v, alter);
 1045             break;
 1046 
 1047         case BPF_ALU | BPF_NEG:
 1048             if (alter && vmap[val[A_ATOM]].is_const)
 1049             {
 1050                 s->code = BPF_LD | BPF_IMM;
 1051                 s->k = -vmap[val[A_ATOM]].const_val;
 1052                 val[A_ATOM] = K(s->k);
 1053             }
 1054             else
 1055                 val[A_ATOM] = F(s->code, val[A_ATOM], 0L);
 1056             break;
 1057 
 1058         case BPF_ALU | BPF_ADD | BPF_K:
 1059         case BPF_ALU | BPF_SUB | BPF_K:
 1060         case BPF_ALU | BPF_MUL | BPF_K:
 1061         case BPF_ALU | BPF_DIV | BPF_K:
 1062         case BPF_ALU | BPF_AND | BPF_K:
 1063         case BPF_ALU | BPF_OR | BPF_K:
 1064         case BPF_ALU | BPF_LSH | BPF_K:
 1065         case BPF_ALU | BPF_RSH | BPF_K:
 1066             op = BPF_OP(s->code);
 1067             if (alter)
 1068             {
 1069                 if (s->k == 0)
 1070                 {
 1071                     /* don't optimize away "sub #0"
 1072                      * as it may be needed later to
 1073                      * fixup the generated math code */
 1074                     if (op == BPF_ADD || op == BPF_LSH || op == BPF_RSH || op == BPF_OR)
 1075                     {
 1076                         s->code = NOP;
 1077                         break;
 1078                     }
 1079                     if (op == BPF_MUL || op == BPF_AND)
 1080                     {
 1081                         s->code = BPF_LD | BPF_IMM;
 1082                         val[A_ATOM] = K(s->k);
 1083                         break;
 1084                     }
 1085                 }
 1086                 if (vmap[val[A_ATOM]].is_const)
 1087                 {
 1088                     fold_op(s, val[A_ATOM], K(s->k));
 1089                     val[A_ATOM] = K(s->k);
 1090                     break;
 1091                 }
 1092             }
 1093             val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k));
 1094             break;
 1095 
 1096         case BPF_ALU | BPF_ADD | BPF_X:
 1097         case BPF_ALU | BPF_SUB | BPF_X:
 1098         case BPF_ALU | BPF_MUL | BPF_X:
 1099         case BPF_ALU | BPF_DIV | BPF_X:
 1100         case BPF_ALU | BPF_AND | BPF_X:
 1101         case BPF_ALU | BPF_OR | BPF_X:
 1102         case BPF_ALU | BPF_LSH | BPF_X:
 1103         case BPF_ALU | BPF_RSH | BPF_X:
 1104             op = BPF_OP(s->code);
 1105             if (alter && vmap[val[X_ATOM]].is_const)
 1106             {
 1107                 if (vmap[val[A_ATOM]].is_const)
 1108                 {
 1109                     fold_op(s, val[A_ATOM], val[X_ATOM]);
 1110                     val[A_ATOM] = K(s->k);
 1111                 }
 1112                 else
 1113                 {
 1114                     s->code = BPF_ALU | BPF_K | op;
 1115                     s->k = vmap[val[X_ATOM]].const_val;
 1116                     done = 0;
 1117                     val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k));
 1118                 }
 1119                 break;
 1120             }
 1121             /*
 1122              * Check if we're doing something to an accumulator
 1123              * that is 0, and simplify.  This may not seem like
 1124              * much of a simplification but it could open up further
 1125              * optimizations.
 1126              * XXX We could also check for mul by 1, etc.
 1127              */
 1128             if (alter && vmap[val[A_ATOM]].is_const && vmap[val[A_ATOM]].const_val == 0)
 1129             {
 1130                 if (op == BPF_ADD || op == BPF_OR)
 1131                 {
 1132                     s->code = BPF_MISC | BPF_TXA;
 1133                     vstore(s, &val[A_ATOM], val[X_ATOM], alter);
 1134                     break;
 1135                 }
 1136                 else if (op == BPF_MUL || op == BPF_DIV || op == BPF_AND || op == BPF_LSH || op == BPF_RSH)
 1137                 {
 1138                     s->code = BPF_LD | BPF_IMM;
 1139                     s->k = 0;
 1140                     vstore(s, &val[A_ATOM], K(s->k), alter);
 1141                     break;
 1142                 }
 1143                 else if (op == BPF_NEG)
 1144                 {
 1145                     s->code = NOP;
 1146                     break;
 1147                 }
 1148             }
 1149             val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]);
 1150             break;
 1151 
 1152         case BPF_MISC | BPF_TXA:
 1153             vstore(s, &val[A_ATOM], val[X_ATOM], alter);
 1154             break;
 1155 
 1156         case BPF_LD | BPF_MEM:
 1157             v = val[s->k];
 1158             if (alter && vmap[v].is_const)
 1159             {
 1160                 s->code = BPF_LD | BPF_IMM;
 1161                 s->k = vmap[v].const_val;
 1162                 done = 0;
 1163             }
 1164             vstore(s, &val[A_ATOM], v, alter);
 1165             break;
 1166 
 1167         case BPF_MISC | BPF_TAX:
 1168             vstore(s, &val[X_ATOM], val[A_ATOM], alter);
 1169             break;
 1170 
 1171         case BPF_LDX | BPF_MEM:
 1172             v = val[s->k];
 1173             if (alter && vmap[v].is_const)
 1174             {
 1175                 s->code = BPF_LDX | BPF_IMM;
 1176                 s->k = vmap[v].const_val;
 1177                 done = 0;
 1178             }
 1179             vstore(s, &val[X_ATOM], v, alter);
 1180             break;
 1181 
 1182         case BPF_ST:
 1183             vstore(s, &val[s->k], val[A_ATOM], alter);
 1184             break;
 1185 
 1186         case BPF_STX:
 1187             vstore(s, &val[s->k], val[X_ATOM], alter);
 1188             break;
 1189     }
 1190 }
 1191 
 1192 static void deadstmt(s, last)
 1193      register struct stmt *s;
 1194      register struct stmt *last[];
 1195 {
 1196     register int atom;
 1197 
 1198     atom = atomuse(s);
 1199     if (atom >= 0)
 1200     {
 1201         if (atom == AX_ATOM)
 1202         {
 1203             last[X_ATOM] = 0;
 1204             last[A_ATOM] = 0;
 1205         }
 1206         else
 1207             last[atom] = 0;
 1208     }
 1209     atom = atomdef(s);
 1210     if (atom >= 0)
 1211     {
 1212         if (last[atom])
 1213         {
 1214             done = 0;
 1215             last[atom]->code = NOP;
 1216         }
 1217         last[atom] = s;
 1218     }
 1219 }
 1220 
 1221 static void opt_deadstores(b)
 1222      register struct block *b;
 1223 {
 1224     register struct slist *s;
 1225     register int atom;
 1226     struct stmt *last[N_ATOMS];
 1227 
 1228     memset((char *) last, 0, sizeof last);
 1229 
 1230     for (s = b->stmts; s != 0; s = s->next)
 1231         deadstmt(&s->s, last);
 1232     deadstmt(&b->s, last);
 1233 
 1234     for (atom = 0; atom < N_ATOMS; ++atom)
 1235         if (last[atom] && !ATOMELEM(b->out_use, atom))
 1236         {
 1237             last[atom]->code = NOP;
 1238             done = 0;
 1239         }
 1240 }
 1241 
 1242 static void opt_blk(b, do_stmts)
 1243      struct block *b;
 1244      int do_stmts;
 1245 {
 1246     struct slist *s;
 1247     struct edge *p;
 1248     int i;
 1249     bpf_int32 aval, xval;
 1250 
 1251 #if 0
 1252     for (s = b->stmts; s && s->next; s = s->next)
 1253         if (BPF_CLASS(s->s.code) == BPF_JMP)
 1254         {
 1255             do_stmts = 0;
 1256             break;
 1257         }
 1258 #endif
 1259 
 1260     /*
 1261      * Initialize the atom values.
 1262      */
 1263     p = b->in_edges;
 1264     if (p == 0)
 1265     {
 1266         /*
 1267          * We have no predecessors, so everything is undefined
 1268          * upon entry to this block.
 1269          */
 1270         memset((char *) b->val, 0, sizeof(b->val));
 1271     }
 1272     else
 1273     {
 1274         /*
 1275          * Inherit values from our predecessors.
 1276          *
 1277          * First, get the values from the predecessor along the
 1278          * first edge leading to this node.
 1279          */
 1280         memcpy((char *) b->val, (char *) p->pred->val, sizeof(b->val));
 1281         /*
 1282          * Now look at all the other nodes leading to this node.
 1283          * If, for the predecessor along that edge, a register
 1284          * has a different value from the one we have (i.e.,
 1285          * control paths are merging, and the merging paths
 1286          * assign different values to that register), give the
 1287          * register the undefined value of 0.
 1288          */
 1289         while ((p = p->next) != NULL)
 1290         {
 1291             for (i = 0; i < N_ATOMS; ++i)
 1292                 if (b->val[i] != p->pred->val[i])
 1293                     b->val[i] = 0;
 1294         }
 1295     }
 1296     aval = b->val[A_ATOM];
 1297     xval = b->val[X_ATOM];
 1298     for (s = b->stmts; s; s = s->next)
 1299         opt_stmt(&s->s, b->val, do_stmts);
 1300 
 1301     /*
 1302      * This is a special case: if we don't use anything from this
 1303      * block, and we load the accumulator or index register with a
 1304      * value that is already there, or if this block is a return,
 1305      * eliminate all the statements.
 1306      *
 1307      * XXX - what if it does a store?
 1308      *
 1309      * XXX - why does it matter whether we use anything from this
 1310      * block?  If the accumulator or index register doesn't change
 1311      * its value, isn't that OK even if we use that value?
 1312      *
 1313      * XXX - if we load the accumulator with a different value,
 1314      * and the block ends with a conditional branch, we obviously
 1315      * can't eliminate it, as the branch depends on that value.
 1316      * For the index register, the conditional branch only depends
 1317      * on the index register value if the test is against the index
 1318      * register value rather than a constant; if nothing uses the
 1319      * value we put into the index register, and we're not testing
 1320      * against the index register's value, and there aren't any
 1321      * other problems that would keep us from eliminating this
 1322      * block, can we eliminate it?
 1323      */
 1324     if (do_stmts &&
 1325         ((b->out_use == 0 && aval != 0 && b->val[A_ATOM] == aval &&
 1326           xval != 0 && b->val[X_ATOM] == xval) || BPF_CLASS(b->s.code) == BPF_RET))
 1327     {
 1328         if (b->stmts != 0)
 1329         {
 1330             b->stmts = 0;
 1331             done = 0;
 1332         }
 1333     }
 1334     else
 1335     {
 1336         opt_peep(b);
 1337         opt_deadstores(b);
 1338     }
 1339     /*
 1340      * Set up values for branch optimizer.
 1341      */
 1342     if (BPF_SRC(b->s.code) == BPF_K)
 1343         b->oval = K(b->s.k);
 1344     else
 1345         b->oval = b->val[X_ATOM];
 1346     b->et.code = b->s.code;
 1347     b->ef.code = -b->s.code;
 1348 }
 1349 
 1350 /*
 1351  * Return true if any register that is used on exit from 'succ', has
 1352  * an exit value that is different from the corresponding exit value
 1353  * from 'b'.
 1354  */
 1355 static int use_conflict(b, succ)
 1356      struct block *b, *succ;
 1357 {
 1358     int atom;
 1359     atomset use = succ->out_use;
 1360 
 1361     if (use == 0)
 1362         return 0;
 1363 
 1364     for (atom = 0; atom < N_ATOMS; ++atom)
 1365         if (ATOMELEM(use, atom))
 1366             if (b->val[atom] != succ->val[atom])
 1367                 return 1;
 1368     return 0;
 1369 }
 1370 
 1371 static struct block *fold_edge(child, ep)
 1372      struct block *child;
 1373      struct edge *ep;
 1374 {
 1375     int sense;
 1376     int aval0, aval1, oval0, oval1;
 1377     int code = ep->code;
 1378 
 1379     if (code < 0)
 1380     {
 1381         code = -code;
 1382         sense = 0;
 1383     }
 1384     else
 1385         sense = 1;
 1386 
 1387     if (child->s.code != code)
 1388         return 0;
 1389 
 1390     aval0 = child->val[A_ATOM];
 1391     oval0 = child->oval;
 1392     aval1 = ep->pred->val[A_ATOM];
 1393     oval1 = ep->pred->oval;
 1394 
 1395     if (aval0 != aval1)
 1396         return 0;
 1397 
 1398     if (oval0 == oval1)
 1399         /*
 1400          * The operands of the branch instructions are
 1401          * identical, so the result is true if a true
 1402          * branch was taken to get here, otherwise false.
 1403          */
 1404         return sense ? JT(child) : JF(child);
 1405 
 1406     if (sense && code == (BPF_JMP | BPF_JEQ | BPF_K))
 1407         /*
 1408          * At this point, we only know the comparison if we
 1409          * came down the true branch, and it was an equality
 1410          * comparison with a constant.
 1411          *
 1412          * I.e., if we came down the true branch, and the branch
 1413          * was an equality comparison with a constant, we know the
 1414          * accumulator contains that constant.  If we came down
 1415          * the false branch, or the comparison wasn't with a
 1416          * constant, we don't know what was in the accumulator.
 1417          *
 1418          * We rely on the fact that distinct constants have distinct
 1419          * value numbers.
 1420          */
 1421         return JF(child);
 1422 
 1423     return 0;
 1424 }
 1425 
 1426 static void opt_j(ep)
 1427      struct edge *ep;
 1428 {
 1429     register int i, k;
 1430     register struct block *target;
 1431 
 1432     if (JT(ep->succ) == 0)
 1433         return;
 1434 
 1435     if (JT(ep->succ) == JF(ep->succ))
 1436     {
 1437         /*
 1438          * Common branch targets can be eliminated, provided
 1439          * there is no data dependency.
 1440          */
 1441         if (!use_conflict(ep->pred, ep->succ->et.succ))
 1442         {
 1443             done = 0;
 1444             ep->succ = JT(ep->succ);
 1445         }
 1446     }
 1447     /*
 1448      * For each edge dominator that matches the successor of this
 1449      * edge, promote the edge successor to the its grandchild.
 1450      *
 1451      * XXX We violate the set abstraction here in favor a reasonably
 1452      * efficient loop.
 1453      */
 1454   top:
 1455     for (i = 0; i < edgewords; ++i)
 1456     {
 1457         register bpf_u_int32 x = ep->edom[i];
 1458 
 1459         while (x != 0)
 1460         {
 1461             k = ffs(x) - 1;
 1462             x &= ~(1 << k);
 1463             k += i * BITS_PER_WORD;
 1464 
 1465             target = fold_edge(ep->succ, edges[k]);
 1466             /*
 1467              * Check that there is no data dependency between
 1468              * nodes that will be violated if we move the edge.
 1469              */
 1470             if (target != 0 && !use_conflict(ep->pred, target))
 1471             {
 1472                 done = 0;
 1473                 ep->succ = target;
 1474                 if (JT(target) != 0)
 1475                     /*
 1476                      * Start over unless we hit a leaf.
 1477                      */
 1478                     goto top;
 1479                 return;
 1480             }
 1481         }
 1482     }
 1483 }
 1484 
 1485 
 1486 static void or_pullup(b)
 1487      struct block *b;
 1488 {
 1489     int val, at_top;
 1490     struct block *pull;
 1491     struct block **diffp, **samep;
 1492     struct edge *ep;
 1493 
 1494     ep = b->in_edges;
 1495     if (ep == 0)
 1496         return;
 1497 
 1498     /*
 1499      * Make sure each predecessor loads the same value.
 1500      * XXX why?
 1501      */
 1502     val = ep->pred->val[A_ATOM];
 1503     for (ep = ep->next; ep != 0; ep = ep->next)
 1504         if (val != ep->pred->val[A_ATOM])
 1505             return;
 1506 
 1507     if (JT(b->in_edges->pred) == b)
 1508         diffp = &JT(b->in_edges->pred);
 1509     else
 1510         diffp = &JF(b->in_edges->pred);
 1511 
 1512     at_top = 1;
 1513     while (1)
 1514     {
 1515         if (*diffp == 0)
 1516             return;
 1517 
 1518         if (JT(*diffp) != JT(b))
 1519             return;
 1520 
 1521         if (!SET_MEMBER((*diffp)->dom, b->id))
 1522             return;
 1523 
 1524         if ((*diffp)->val[A_ATOM] != val)
 1525             break;
 1526 
 1527         diffp = &JF(*diffp);
 1528         at_top = 0;
 1529     }
 1530     samep = &JF(*diffp);
 1531     while (1)
 1532     {
 1533         if (*samep == 0)
 1534             return;
 1535 
 1536         if (JT(*samep) != JT(b))
 1537             return;
 1538 
 1539         if (!SET_MEMBER((*samep)->dom, b->id))
 1540             return;
 1541 
 1542         if ((*samep)->val[A_ATOM] == val)
 1543             break;
 1544 
 1545         /* XXX Need to check that there are no data dependencies
 1546            between dp0 and dp1.  Currently, the code generator
 1547            will not produce such dependencies. */
 1548         samep = &JF(*samep);
 1549     }
 1550 #ifdef notdef
 1551     /* XXX This doesn't cover everything. */
 1552     for (i = 0; i < N_ATOMS; ++i)
 1553         if ((*samep)->val[i] != pred->val[i])
 1554             return;
 1555 #endif
 1556     /* Pull up the node. */
 1557     pull = *samep;
 1558     *samep = JF(pull);
 1559     JF(pull) = *diffp;
 1560 
 1561     /*
 1562      * At the top of the chain, each predecessor needs to point at the
 1563      * pulled up node.  Inside the chain, there is only one predecessor
 1564      * to worry about.
 1565      */
 1566     if (at_top)
 1567     {
 1568         for (ep = b->in_edges; ep != 0; ep = ep->next)
 1569         {
 1570             if (JT(ep->pred) == b)
 1571                 JT(ep->pred) = pull;
 1572             else
 1573                 JF(ep->pred) = pull;
 1574         }
 1575     }
 1576     else
 1577         *diffp = pull;
 1578 
 1579     done = 0;
 1580 }
 1581 
 1582 static void and_pullup(b)
 1583      struct block *b;
 1584 {
 1585     int val, at_top;
 1586     struct block *pull;
 1587     struct block **diffp, **samep;
 1588     struct edge *ep;
 1589 
 1590     ep = b->in_edges;
 1591     if (ep == 0)
 1592         return;
 1593 
 1594     /*
 1595      * Make sure each predecessor loads the same value.
 1596      */
 1597     val = ep->pred->val[A_ATOM];
 1598     for (ep = ep->next; ep != 0; ep = ep->next)
 1599         if (val != ep->pred->val[A_ATOM])
 1600             return;
 1601 
 1602     if (JT(b->in_edges->pred) == b)
 1603         diffp = &JT(b->in_edges->pred);
 1604     else
 1605         diffp = &JF(b->in_edges->pred);
 1606 
 1607     at_top = 1;
 1608     while (1)
 1609     {
 1610         if (*diffp == 0)
 1611             return;
 1612 
 1613         if (JF(*diffp) != JF(b))
 1614             return;
 1615 
 1616         if (!SET_MEMBER((*diffp)->dom, b->id))
 1617             return;
 1618 
 1619         if ((*diffp)->val[A_ATOM] != val)
 1620             break;
 1621 
 1622         diffp = &JT(*diffp);
 1623         at_top = 0;
 1624     }
 1625     samep = &JT(*diffp);
 1626     while (1)
 1627     {
 1628         if (*samep == 0)
 1629             return;
 1630 
 1631         if (JF(*samep) != JF(b))
 1632             return;
 1633 
 1634         if (!SET_MEMBER((*samep)->dom, b->id))
 1635             return;
 1636 
 1637         if ((*samep)->val[A_ATOM] == val)
 1638             break;
 1639 
 1640         /* XXX Need to check that there are no data dependencies
 1641            between diffp and samep.  Currently, the code generator
 1642            will not produce such dependencies. */
 1643         samep = &JT(*samep);
 1644     }
 1645 #ifdef notdef
 1646     /* XXX This doesn't cover everything. */
 1647     for (i = 0; i < N_ATOMS; ++i)
 1648         if ((*samep)->val[i] != pred->val[i])
 1649             return;
 1650 #endif
 1651     /* Pull up the node. */
 1652     pull = *samep;
 1653     *samep = JT(pull);
 1654     JT(pull) = *diffp;
 1655 
 1656     /*
 1657      * At the top of the chain, each predecessor needs to point at the
 1658      * pulled up node.  Inside the chain, there is only one predecessor
 1659      * to worry about.
 1660      */
 1661     if (at_top)
 1662     {
 1663         for (ep = b->in_edges; ep != 0; ep = ep->next)
 1664         {
 1665             if (JT(ep->pred) == b)
 1666                 JT(ep->pred) = pull;
 1667             else
 1668                 JF(ep->pred) = pull;
 1669         }
 1670     }
 1671     else
 1672         *diffp = pull;
 1673 
 1674     done = 0;
 1675 }
 1676 
 1677 static void opt_blks(root, do_stmts)
 1678      struct block *root;
 1679      int do_stmts;
 1680 {
 1681     int i, maxlevel;
 1682     struct block *p;
 1683 
 1684     init_val();
 1685     maxlevel = root->level;
 1686 
 1687     find_inedges(root);
 1688     for (i = maxlevel; i >= 0; --i)
 1689         for (p = levels[i]; p; p = p->link)
 1690             opt_blk(p, do_stmts);
 1691 
 1692     if (do_stmts)
 1693         /*
 1694          * No point trying to move branches; it can't possibly
 1695          * make a difference at this point.
 1696          */
 1697         return;
 1698 
 1699     for (i = 1; i <= maxlevel; ++i)
 1700     {
 1701         for (p = levels[i]; p; p = p->link)
 1702         {
 1703             opt_j(&p->et);
 1704             opt_j(&p->ef);
 1705         }
 1706     }
 1707 
 1708     find_inedges(root);
 1709     for (i = 1; i <= maxlevel; ++i)
 1710     {
 1711         for (p = levels[i]; p; p = p->link)
 1712         {
 1713             or_pullup(p);
 1714             and_pullup(p);
 1715         }
 1716     }
 1717 }
 1718 
 1719 static inline void link_inedge(parent, child)
 1720      struct edge *parent;
 1721      struct block *child;
 1722 {
 1723     parent->next = child->in_edges;
 1724     child->in_edges = parent;
 1725 }
 1726 
 1727 static void find_inedges(root)
 1728      struct block *root;
 1729 {
 1730     int i;
 1731     struct block *b;
 1732 
 1733     for (i = 0; i < n_blocks; ++i)
 1734         blocks[i]->in_edges = 0;
 1735 
 1736     /*
 1737      * Traverse the graph, adding each edge to the predecessor
 1738      * list of its successors.  Skip the leaves (i.e. level 0).
 1739      */
 1740     for (i = root->level; i > 0; --i)
 1741     {
 1742         for (b = levels[i]; b != 0; b = b->link)
 1743         {
 1744             link_inedge(&b->et, JT(b));
 1745             link_inedge(&b->ef, JF(b));
 1746         }
 1747     }
 1748 }
 1749 
 1750 static void opt_root(b)
 1751      struct block **b;
 1752 {
 1753     struct slist *tmp, *s;
 1754 
 1755     s = (*b)->stmts;
 1756     (*b)->stmts = 0;
 1757     while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
 1758         *b = JT(*b);
 1759 
 1760     tmp = (*b)->stmts;
 1761     if (tmp != 0)
 1762         sappend(s, tmp);
 1763     (*b)->stmts = s;
 1764 
 1765     /*
 1766      * If the root node is a return, then there is no
 1767      * point executing any statements (since the bpf machine
 1768      * has no side effects).
 1769      */
 1770     if (BPF_CLASS((*b)->s.code) == BPF_RET)
 1771         (*b)->stmts = 0;
 1772 }
 1773 
 1774 static void opt_loop(root, do_stmts)
 1775      struct block *root;
 1776      int do_stmts;
 1777 {
 1778 
 1779 #ifdef BDEBUG
 1780     if (dflag > 1)
 1781     {
 1782         printf("opt_loop(root, %d) begin\n", do_stmts);
 1783         opt_dump(root);
 1784     }
 1785 #endif
 1786     do
 1787     {
 1788         done = 1;
 1789         find_levels(root);
 1790         find_dom(root);
 1791         find_closure(root);
 1792         find_ud(root);
 1793         find_edom(root);
 1794         opt_blks(root, do_stmts);
 1795 #ifdef BDEBUG
 1796         if (dflag > 1)
 1797         {
 1798             printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, done);
 1799             opt_dump(root);
 1800         }
 1801 #endif
 1802     } while (!done);
 1803 }
 1804 
 1805 /*
 1806  * Optimize the filter code in its dag representation.
 1807  */
 1808 void bpf_optimize(rootp)
 1809      struct block **rootp;
 1810 {
 1811     struct block *root;
 1812 
 1813     root = *rootp;
 1814 
 1815     opt_init(root);
 1816     opt_loop(root, 0);
 1817     opt_loop(root, 1);
 1818     intern_blocks(root);
 1819 #ifdef BDEBUG
 1820     if (dflag > 1)
 1821     {
 1822         printf("after intern_blocks()\n");
 1823         opt_dump(root);
 1824     }
 1825 #endif
 1826     opt_root(rootp);
 1827 #ifdef BDEBUG
 1828     if (dflag > 1)
 1829     {
 1830         printf("after opt_root()\n");
 1831         opt_dump(root);
 1832     }
 1833 #endif
 1834     opt_cleanup();
 1835 }
 1836 
 1837 static void make_marks(p)
 1838      struct block *p;
 1839 {
 1840     if (!isMarked(p))
 1841     {
 1842         Mark(p);
 1843         if (BPF_CLASS(p->s.code) != BPF_RET)
 1844         {
 1845             make_marks(JT(p));
 1846             make_marks(JF(p));
 1847         }
 1848     }
 1849 }
 1850 
 1851 /*
 1852  * Mark code array such that isMarked(i) is true
 1853  * only for nodes that are alive.
 1854  */
 1855 static void mark_code(p)
 1856      struct block *p;
 1857 {
 1858     cur_mark += 1;
 1859     make_marks(p);
 1860 }
 1861 
 1862 /*
 1863  * True iff the two stmt lists load the same value from the packet into
 1864  * the accumulator.
 1865  */
 1866 static int eq_slist(x, y)
 1867      struct slist *x, *y;
 1868 {
 1869     while (1)
 1870     {
 1871         while (x && x->s.code == NOP)
 1872             x = x->next;
 1873         while (y && y->s.code == NOP)
 1874             y = y->next;
 1875         if (x == 0)
 1876             return y == 0;
 1877         if (y == 0)
 1878             return x == 0;
 1879         if (x->s.code != y->s.code || x->s.k != y->s.k)
 1880             return 0;
 1881         x = x->next;
 1882         y = y->next;
 1883     }
 1884 }
 1885 
 1886 static inline int eq_blk(b0, b1)
 1887      struct block *b0, *b1;
 1888 {
 1889     if (b0->s.code == b1->s.code &&
 1890         b0->s.k == b1->s.k && b0->et.succ == b1->et.succ && b0->ef.succ == b1->ef.succ)
 1891         return eq_slist(b0->stmts, b1->stmts);
 1892     return 0;
 1893 }
 1894 
 1895 static void intern_blocks(root)
 1896      struct block *root;
 1897 {
 1898     struct block *p;
 1899     int i, j;
 1900     int done1;                  /* don't shadow global */
 1901   top:
 1902     done1 = 1;
 1903     for (i = 0; i < n_blocks; ++i)
 1904         blocks[i]->link = 0;
 1905 
 1906     mark_code(root);
 1907 
 1908     for (i = n_blocks - 1; --i >= 0;)
 1909     {
 1910         if (!isMarked(blocks[i]))
 1911             continue;
 1912         for (j = i + 1; j < n_blocks; ++j)
 1913         {
 1914             if (!isMarked(blocks[j]))
 1915                 continue;
 1916             if (eq_blk(blocks[i], blocks[j]))
 1917             {
 1918                 blocks[i]->link = blocks[j]->link ? blocks[j]->link : blocks[j];
 1919                 break;
 1920             }
 1921         }
 1922     }
 1923     for (i = 0; i < n_blocks; ++i)
 1924     {
 1925         p = blocks[i];
 1926         if (JT(p) == 0)
 1927             continue;
 1928         if (JT(p)->link)
 1929         {
 1930             done1 = 0;
 1931             JT(p) = JT(p)->link;
 1932         }
 1933         if (JF(p)->link)
 1934         {
 1935             done1 = 0;
 1936             JF(p) = JF(p)->link;
 1937         }
 1938     }
 1939     if (!done1)
 1940         goto top;
 1941 }
 1942 
 1943 static void opt_cleanup()
 1944 {
 1945     free((void *) vnode_base);
 1946     free((void *) vmap);
 1947     free((void *) edges);
 1948     free((void *) space);
 1949     free((void *) levels);
 1950     free((void *) blocks);
 1951 }
 1952 
 1953 /*
 1954  * Return the number of stmts in 's'.
 1955  */
 1956 static int slength(s)
 1957      struct slist *s;
 1958 {
 1959     int n = 0;
 1960 
 1961     for (; s; s = s->next)
 1962         if (s->s.code != NOP)
 1963             ++n;
 1964     return n;
 1965 }
 1966 
 1967 /*
 1968  * Return the number of nodes reachable by 'p'.
 1969  * All nodes should be initially unmarked.
 1970  */
 1971 static int count_blocks(p)
 1972      struct block *p;
 1973 {
 1974     if (p == 0 || isMarked(p))
 1975         return 0;
 1976     Mark(p);
 1977     return count_blocks(JT(p)) + count_blocks(JF(p)) + 1;
 1978 }
 1979 
 1980 /*
 1981  * Do a depth first search on the flow graph, numbering the
 1982  * the basic blocks, and entering them into the 'blocks' array.`
 1983  */
 1984 static void number_blks_r(p)
 1985      struct block *p;
 1986 {
 1987     int n;
 1988 
 1989     if (p == 0 || isMarked(p))
 1990         return;
 1991 
 1992     Mark(p);
 1993     n = n_blocks++;
 1994     p->id = n;
 1995     blocks[n] = p;
 1996 
 1997     number_blks_r(JT(p));
 1998     number_blks_r(JF(p));
 1999 }
 2000 
 2001 /*
 2002  * Return the number of stmts in the flowgraph reachable by 'p'.
 2003  * The nodes should be unmarked before calling.
 2004  *
 2005  * Note that "stmts" means "instructions", and that this includes
 2006  *
 2007  *  side-effect statements in 'p' (slength(p->stmts));
 2008  *
 2009  *  statements in the true branch from 'p' (count_stmts(JT(p)));
 2010  *
 2011  *  statements in the false branch from 'p' (count_stmts(JF(p)));
 2012  *
 2013  *  the conditional jump itself (1);
 2014  *
 2015  *  an extra long jump if the true branch requires it (p->longjt);
 2016  *
 2017  *  an extra long jump if the false branch requires it (p->longjf).
 2018  */
 2019 static int count_stmts(p)
 2020      struct block *p;
 2021 {
 2022     int n;
 2023 
 2024     if (p == 0 || isMarked(p))
 2025         return 0;
 2026     Mark(p);
 2027     n = count_stmts(JT(p)) + count_stmts(JF(p));
 2028     return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
 2029 }
 2030 
 2031 /*
 2032  * Allocate memory.  All allocation is done before optimization
 2033  * is begun.  A linear bound on the size of all data structures is computed
 2034  * from the total number of blocks and/or statements.
 2035  */
 2036 static void opt_init(root)
 2037      struct block *root;
 2038 {
 2039     bpf_u_int32 *p;
 2040     int i, n, max_stmts;
 2041 
 2042     /*
 2043      * First, count the blocks, so we can malloc an array to map
 2044      * block number to block.  Then, put the blocks into the array.
 2045      */
 2046     unMarkAll();
 2047     n = count_blocks(root);
 2048     blocks = (struct block **) calloc(n, sizeof(*blocks));
 2049     if (blocks == NULL)
 2050         bpf_error("malloc");
 2051     unMarkAll();
 2052     n_blocks = 0;
 2053     number_blks_r(root);
 2054 
 2055     n_edges = 2 * n_blocks;
 2056     edges = (struct edge **) calloc(n_edges, sizeof(*edges));
 2057     if (edges == NULL)
 2058         bpf_error("malloc");
 2059 
 2060     /*
 2061      * The number of levels is bounded by the number of nodes.
 2062      */
 2063     levels = (struct block **) calloc(n_blocks, sizeof(*levels));
 2064     if (levels == NULL)
 2065         bpf_error("malloc");
 2066 
 2067     edgewords = n_edges / (8 * sizeof(bpf_u_int32)) + 1;
 2068     nodewords = n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
 2069 
 2070     /* XXX */
 2071     space = (bpf_u_int32 *) malloc(2 * n_blocks * nodewords * sizeof(*space)
 2072                                    + n_edges * edgewords * sizeof(*space));
 2073     if (space == NULL)
 2074         bpf_error("malloc");
 2075     p = space;
 2076     all_dom_sets = p;
 2077     for (i = 0; i < n; ++i)
 2078     {
 2079         blocks[i]->dom = p;
 2080         p += nodewords;
 2081     }
 2082     all_closure_sets = p;
 2083     for (i = 0; i < n; ++i)
 2084     {
 2085         blocks[i]->closure = p;
 2086         p += nodewords;
 2087     }
 2088     all_edge_sets = p;
 2089     for (i = 0; i < n; ++i)
 2090     {
 2091         register struct block *b = blocks[i];
 2092 
 2093         b->et.edom = p;
 2094         p += edgewords;
 2095         b->ef.edom = p;
 2096         p += edgewords;
 2097         b->et.id = i;
 2098         edges[i] = &b->et;
 2099         b->ef.id = n_blocks + i;
 2100         edges[n_blocks + i] = &b->ef;
 2101         b->et.pred = b;
 2102         b->ef.pred = b;
 2103     }
 2104     max_stmts = 0;
 2105     for (i = 0; i < n; ++i)
 2106         max_stmts += slength(blocks[i]->stmts) + 1;
 2107     /*
 2108      * We allocate at most 3 value numbers per statement,
 2109      * so this is an upper bound on the number of valnodes
 2110      * we'll need.
 2111      */
 2112     maxval = 3 * max_stmts;
 2113     vmap = (struct vmapinfo *) calloc(maxval, sizeof(*vmap));
 2114     vnode_base = (struct valnode *) calloc(maxval, sizeof(*vnode_base));
 2115     if (vmap == NULL || vnode_base == NULL)
 2116         bpf_error("malloc");
 2117 }
 2118 
 2119 /*
 2120  * Some pointers used to convert the basic block form of the code,
 2121  * into the array form that BPF requires.  'fstart' will point to
 2122  * the malloc'd array while 'ftail' is used during the recursive traversal.
 2123  */
 2124 static struct bpf_insn *fstart;
 2125 static struct bpf_insn *ftail;
 2126 
 2127 #ifdef BDEBUG
 2128 int bids[1000];
 2129 #endif
 2130 
 2131 /*
 2132  * Returns true if successful.  Returns false if a branch has
 2133  * an offset that is too large.  If so, we have marked that
 2134  * branch so that on a subsequent iteration, it will be treated
 2135  * properly.
 2136  */
 2137 static int convert_code_r(p)
 2138      struct block *p;
 2139 {
 2140     struct bpf_insn *dst;
 2141     struct slist *src;
 2142     int slen;
 2143     u_int off;
 2144     int extrajmps;              /* number of extra jumps inserted */
 2145     struct slist **offset = NULL;
 2146 
 2147     if (p == 0 || isMarked(p))
 2148         return (1);
 2149     Mark(p);
 2150 
 2151     if (convert_code_r(JF(p)) == 0)
 2152         return (0);
 2153     if (convert_code_r(JT(p)) == 0)
 2154         return (0);
 2155 
 2156     slen = slength(p->stmts);
 2157     dst = ftail -= (slen + 1 + p->longjt + p->longjf);
 2158     /* inflate length by any extra jumps */
 2159 
 2160     p->offset = dst - fstart;
 2161 
 2162     /* generate offset[] for convenience  */
 2163     if (slen)
 2164     {
 2165         offset = (struct slist **) calloc(slen, sizeof(struct slist *));
 2166         if (!offset)
 2167         {
 2168             bpf_error("not enough core");
 2169          /*NOTREACHED*/}
 2170     }
 2171     src = p->stmts;
 2172     for (off = 0; off < slen && src; off++)
 2173     {
 2174 #if 0
 2175         printf("off=%d src=%x\n", off, src);
 2176 #endif
 2177         offset[off] = src;
 2178         src = src->next;
 2179     }
 2180 
 2181     off = 0;
 2182     for (src = p->stmts; src; src = src->next)
 2183     {
 2184         if (src->s.code == NOP)
 2185             continue;
 2186         dst->code = (u_short) src->s.code;
 2187         dst->k = src->s.k;
 2188 
 2189         /* fill block-local relative jump */
 2190         if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP | BPF_JA))
 2191         {
 2192 #if 0
 2193             if (src->s.jt || src->s.jf)
 2194             {
 2195                 bpf_error("illegal jmp destination");
 2196              /*NOTREACHED*/}
 2197 #endif
 2198             goto filled;
 2199         }
 2200         if (off == slen - 2)    /*??? */
 2201             goto filled;
 2202 
 2203         {
 2204             int i;
 2205             int jt, jf;
 2206             const char *ljerr = "%s for block-local relative jump: off=%d";
 2207 
 2208 #if 0
 2209             printf("code=%x off=%d %x %x\n", src->s.code, off, src->s.jt, src->s.jf);
 2210 #endif
 2211 
 2212             if (!src->s.jt || !src->s.jf)
 2213             {
 2214                 bpf_error(ljerr, "no jmp destination", off);
 2215              /*NOTREACHED*/}
 2216 
 2217             jt = jf = 0;
 2218             for (i = 0; i < slen; i++)
 2219             {
 2220                 if (offset[i] == src->s.jt)
 2221                 {
 2222                     if (jt)
 2223                     {
 2224                         bpf_error(ljerr, "multiple matches", off);
 2225                      /*NOTREACHED*/}
 2226 
 2227                     dst->jt = i - off - 1;
 2228                     jt++;
 2229                 }
 2230                 if (offset[i] == src->s.jf)
 2231                 {
 2232                     if (jf)
 2233                     {
 2234                         bpf_error(ljerr, "multiple matches", off);
 2235                      /*NOTREACHED*/}
 2236                     dst->jf = i - off - 1;
 2237                     jf++;
 2238                 }
 2239             }
 2240             if (!jt || !jf)
 2241             {
 2242                 bpf_error(ljerr, "no destination found", off);
 2243              /*NOTREACHED*/}
 2244         }
 2245       filled:
 2246         ++dst;
 2247         ++off;
 2248     }
 2249     if (offset)
 2250         free(offset);
 2251 
 2252 #ifdef BDEBUG
 2253     bids[dst - fstart] = p->id + 1;
 2254 #endif
 2255     dst->code = (u_short) p->s.code;
 2256     dst->k = p->s.k;
 2257     if (JT(p))
 2258     {
 2259         extrajmps = 0;
 2260         off = JT(p)->offset - (p->offset + slen) - 1;
 2261         if (off >= 256)
 2262         {
 2263             /* offset too large for branch, must add a jump */
 2264             if (p->longjt == 0)
 2265             {
 2266                 /* mark this instruction and retry */
 2267                 p->longjt++;
 2268                 return (0);
 2269             }
 2270             /* branch if T to following jump */
 2271             dst->jt = extrajmps;
 2272             extrajmps++;
 2273             dst[extrajmps].code = BPF_JMP | BPF_JA;
 2274             dst[extrajmps].k = off - extrajmps;
 2275         }
 2276         else
 2277             dst->jt = off;
 2278         off = JF(p)->offset - (p->offset + slen) - 1;
 2279         if (off >= 256)
 2280         {
 2281             /* offset too large for branch, must add a jump */
 2282             if (p->longjf == 0)
 2283             {
 2284                 /* mark this instruction and retry */
 2285                 p->longjf++;
 2286                 return (0);
 2287             }
 2288             /* branch if F to following jump */
 2289             /* if two jumps are inserted, F goes to second one */
 2290             dst->jf = extrajmps;
 2291             extrajmps++;
 2292             dst[extrajmps].code = BPF_JMP | BPF_JA;
 2293             dst[extrajmps].k = off - extrajmps;
 2294         }
 2295         else
 2296             dst->jf = off;
 2297     }
 2298     return (1);
 2299 }
 2300 
 2301 
 2302 /*
 2303  * Convert flowgraph intermediate representation to the
 2304  * BPF array representation.  Set *lenp to the number of instructions.
 2305  *
 2306  * This routine does *NOT* leak the memory pointed to by fp.  It *must
 2307  * not* do free(fp) before returning fp; doing so would make no sense,
 2308  * as the BPF array pointed to by the return value of icode_to_fcode()
 2309  * must be valid - it's being returned for use in a bpf_program structure.
 2310  *
 2311  * If it appears that icode_to_fcode() is leaking, the problem is that
 2312  * the program using pcap_compile() is failing to free the memory in
 2313  * the BPF program when it's done - the leak is in the program, not in
 2314  * the routine that happens to be allocating the memory.  (By analogy, if
 2315  * a program calls fopen() without ever calling fclose() on the FILE *,
 2316  * it will leak the FILE structure; the leak is not in fopen(), it's in
 2317  * the program.)  Change the program to use pcap_freecode() when it's
 2318  * done with the filter program.  See the pcap man page.
 2319  */
 2320 struct bpf_insn *icode_to_fcode(root, lenp)
 2321      struct block *root;
 2322      int *lenp;
 2323 {
 2324     int n;
 2325     struct bpf_insn *fp;
 2326 
 2327     /*
 2328      * Loop doing convert_code_r() until no branches remain
 2329      * with too-large offsets.
 2330      */
 2331     while (1)
 2332     {
 2333         unMarkAll();
 2334         n = *lenp = count_stmts(root);
 2335 
 2336         fp = (struct bpf_insn *) malloc(sizeof(*fp) * n);
 2337         if (fp == NULL)
 2338             bpf_error("malloc");
 2339         memset((char *) fp, 0, sizeof(*fp) * n);
 2340         fstart = fp;
 2341         ftail = fp + n;
 2342 
 2343         unMarkAll();
 2344         if (convert_code_r(root))
 2345             break;
 2346         free(fp);
 2347     }
 2348 
 2349     return fp;
 2350 }
 2351 
 2352 #ifdef BDEBUG
 2353 static void opt_dump(root)
 2354      struct block *root;
 2355 {
 2356     struct bpf_program f;
 2357 
 2358     memset(bids, 0, sizeof bids);
 2359     f.bf_insns = icode_to_fcode(root, &f.bf_len);
 2360     bpf_dump(&f, 1);
 2361     putchar('\n');
 2362     free((char *) f.bf_insns);
 2363 }
 2364 #endif