"Fossies" - the Fresh Open Source Software Archive

Member "regex2.h" (25 Nov 2004, 5365 Bytes) of package /linux/privat/old/dirsync-1_11.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "regex2.h" see the Fossies "Dox" file reference documentation.

    1 /*
    2  * First, the stuff that ends up in the outside-world include file
    3  = typedef off_t regoff_t;
    4  = typedef struct {
    5  =  int re_magic;
    6  =  size_t re_nsub;     // number of parenthesized subexpressions
    7  =  const char *re_endp;    // end pointer for REG_PEND
    8  =  struct re_guts *re_g;   // none of your business :-)
    9  = } regex_t;
   10  = typedef struct {
   11  =  regoff_t rm_so;     // start of match
   12  =  regoff_t rm_eo;     // end of match
   13  = } regmatch_t;
   14  */
   15 /*
   16  * internals of regex_t
   17  */
   18 #define MAGIC1  ((('r'^0200)<<8) | 'e')
   19 
   20 /*
   21  * The internal representation is a *strip*, a sequence of
   22  * operators ending with an endmarker.  (Some terminology etc. is a
   23  * historical relic of earlier versions which used multiple strips.)
   24  * Certain oddities in the representation are there to permit running
   25  * the machinery backwards; in particular, any deviation from sequential
   26  * flow must be marked at both its source and its destination.  Some
   27  * fine points:
   28  *
   29  * - OPLUS_ and O_PLUS are *inside* the loop they create.
   30  * - OQUEST_ and O_QUEST are *outside* the bypass they create.
   31  * - OCH_ and O_CH are *outside* the multi-way branch they create, while
   32  *   OOR1 and OOR2 are respectively the end and the beginning of one of
   33  *   the branches.  Note that there is an implicit OOR2 following OCH_
   34  *   and an implicit OOR1 preceding O_CH.
   35  *
   36  * In state representations, an operator's bit is on to signify a state
   37  * immediately *preceding* "execution" of that operator.
   38  */
   39 typedef long sop;       /* strip operator */
   40 typedef long sopno;
   41 #define OPRMASK 0x7c000000
   42 #define OPDMASK 0x03ffffff
   43 #define OPSHIFT (26)
   44 #define OP(n)   ((n)&OPRMASK)
   45 #define OPND(n) ((n)&OPDMASK)
   46 #define SOP(op, opnd)   ((op)|(opnd))
   47 /* operators               meaning  operand         */
   48 /*                      (back, fwd are offsets) */
   49 #define OEND    (1<<OPSHIFT)    /* endmarker    -           */
   50 #define OCHAR   (2<<OPSHIFT)    /* character    unsigned char       */
   51 #define OBOL    (3<<OPSHIFT)    /* left anchor  -           */
   52 #define OEOL    (4<<OPSHIFT)    /* right anchor -           */
   53 #define OANY    (5<<OPSHIFT)    /* .        -           */
   54 #define OANYOF  (6<<OPSHIFT)    /* [...]    set number      */
   55 #define OBACK_  (7<<OPSHIFT)    /* begin \d paren number        */
   56 #define O_BACK  (8<<OPSHIFT)    /* end \d   paren number        */
   57 #define OPLUS_  (9<<OPSHIFT)    /* + prefix fwd to suffix       */
   58 #define O_PLUS  (10<<OPSHIFT)   /* + suffix back to prefix      */
   59 #define OQUEST_ (11<<OPSHIFT)   /* ? prefix fwd to suffix       */
   60 #define O_QUEST (12<<OPSHIFT)   /* ? suffix back to prefix      */
   61 #define OLPAREN (13<<OPSHIFT)   /* (        fwd to )        */
   62 #define ORPAREN (14<<OPSHIFT)   /* )        back to (       */
   63 #define OCH_    (15<<OPSHIFT)   /* begin choice fwd to OOR2     */
   64 #define OOR1    (16<<OPSHIFT)   /* | pt. 1  back to OOR1 or OCH_    */
   65 #define OOR2    (17<<OPSHIFT)   /* | pt. 2  fwd to OOR2 or O_CH */
   66 #define O_CH    (18<<OPSHIFT)   /* end choice   back to OOR1        */
   67 #define OBOW    (19<<OPSHIFT)   /* begin word   -           */
   68 #define OEOW    (20<<OPSHIFT)   /* end word -           */
   69 
   70 /*
   71  * Structure for [] character-set representation.  Character sets are
   72  * done as bit vectors, grouped 8 to a byte vector for compactness.
   73  * The individual set therefore has both a pointer to the byte vector
   74  * and a mask to pick out the relevant bit of each byte.  A hash code
   75  * simplifies testing whether two sets could be identical.
   76  *
   77  * This will get trickier for multicharacter collating elements.  As
   78  * preliminary hooks for dealing with such things, we also carry along
   79  * a string of multi-character elements, and decide the size of the
   80  * vectors at run time.
   81  */
   82 typedef struct {
   83     uch *ptr;       /* -> uch [csetsize] */
   84     uch mask;       /* bit within array */
   85     uch hash;       /* hash code */
   86     size_t smultis;
   87     char *multis;       /* -> char[smulti]  ab\0cd\0ef\0\0 */
   88 } cset;
   89 /* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */
   90 #define CHadd(cs, c)    ((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c))
   91 #define CHsub(cs, c)    ((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c))
   92 #define CHIN(cs, c) ((cs)->ptr[(uch)(c)] & (cs)->mask)
   93 #define MCadd(p, cs, cp)    mcadd(p, cs, cp)    /* regcomp() internal fns */
   94 #define MCsub(p, cs, cp)    mcsub(p, cs, cp)
   95 #define MCin(p, cs, cp) mcin(p, cs, cp)
   96 
   97 /* stuff for character categories */
   98 typedef unsigned char cat_t;
   99 
  100 /*
  101  * main compiled-expression structure
  102  */
  103 struct re_guts {
  104     int magic;
  105 #       define  MAGIC2  ((('R'^0200)<<8)|'E')
  106     sop *strip;     /* malloced area for strip */
  107     int csetsize;       /* number of bits in a cset vector */
  108     int ncsets;     /* number of csets in use */
  109     cset *sets;     /* -> cset [ncsets] */
  110     uch *setbits;       /* -> uch[csetsize][ncsets/CHAR_BIT] */
  111     int cflags;     /* copy of regcomp() cflags argument */
  112     sopno nstates;      /* = number of sops */
  113     sopno firststate;   /* the initial OEND (normally 0) */
  114     sopno laststate;    /* the final OEND */
  115     int iflags;     /* internal flags */
  116 #       define  USEBOL  01  /* used ^ */
  117 #       define  USEEOL  02  /* used $ */
  118 #       define  BAD 04  /* something wrong */
  119     int nbol;       /* number of ^ used */
  120     int neol;       /* number of $ used */
  121     int ncategories;    /* how many character categories */
  122     cat_t *categories;  /* ->catspace[-CHAR_MIN] */
  123     char *must;     /* match must contain this string */
  124     int mlen;       /* length of must */
  125     size_t nsub;        /* copy of re_nsub */
  126     int backrefs;       /* does it use back references? */
  127     sopno nplus;        /* how deep does it nest +s? */
  128     /* catspace must be last */
  129     cat_t catspace[1];  /* actually [NC] */
  130 };
  131 
  132 /* misc utilities */
  133 #define OUT (CHAR_MAX+1)    /* a non-character value */
  134 #define ISWORD(c)   (isalnum(c) || (c) == '_')