"Fossies" - the Fresh Open Source Software Archive

Member "engine.c" (25 Nov 2004, 25546 Bytes) of package /linux/privat/old/dirsync-1_11.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "engine.c" see the Fossies "Dox" file reference documentation.

    1 /*
    2  * The matching engine and friends.  This file is #included by regexec.c
    3  * after suitable #defines of a variety of macros used herein, so that
    4  * different state representations can be used without duplicating masses
    5  * of code.
    6  */
    7 
    8 #ifdef SNAMES
    9 #define matcher smatcher
   10 #define fast    sfast
   11 #define slow    sslow
   12 #define dissect sdissect
   13 #define backref sbackref
   14 #define step    sstep
   15 #define print   sprint
   16 #define at  sat
   17 #define match   smat
   18 #endif
   19 #ifdef LNAMES
   20 #define matcher lmatcher
   21 #define fast    lfast
   22 #define slow    lslow
   23 #define dissect ldissect
   24 #define backref lbackref
   25 #define step    lstep
   26 #define print   lprint
   27 #define at  lat
   28 #define match   lmat
   29 #endif
   30 
   31 /* another structure passed up and down to avoid zillions of parameters */
   32 struct match {
   33     struct re_guts *g;
   34     int eflags;
   35     regmatch_t *pmatch; /* [nsub+1] (0 element unused) */
   36     char *offp;     /* offsets work from here */
   37     char *beginp;       /* start of string -- virtual NUL precedes */
   38     char *endp;     /* end of string -- virtual NUL here */
   39     char *coldp;        /* can be no match starting before here */
   40     char **lastpos;     /* [nplus+1] */
   41     STATEVARS;
   42     states st;      /* current states */
   43     states fresh;       /* states for a fresh start */
   44     states tmp;     /* temporary */
   45     states empty;       /* empty set of states */
   46 };
   47 
   48 #include "engine.ih"
   49 
   50 #ifdef REDEBUG
   51 #define SP(t, s, c) print(m, t, s, c, stdout)
   52 #define AT(t, p1, p2, s1, s2)   at(m, t, p1, p2, s1, s2)
   53 #define NOTE(str)   { if (m->eflags&REG_TRACE) printf("=%s\n", (str)); }
   54 #else
   55 #define SP(t, s, c) /* nothing */
   56 #define AT(t, p1, p2, s1, s2)   /* nothing */
   57 #define NOTE(s) /* nothing */
   58 #endif
   59 
   60 /*
   61  - matcher - the actual matching engine
   62  == static int matcher(register struct re_guts *g, char *string, \
   63  == size_t nmatch, regmatch_t pmatch[], int eflags);
   64  */
   65 static int          /* 0 success, REG_NOMATCH failure */
   66 matcher(g, string, nmatch, pmatch, eflags)
   67 register struct re_guts *g;
   68 char *string;
   69 size_t nmatch;
   70 regmatch_t pmatch[];
   71 int eflags;
   72 {
   73     register char *endp;
   74     register int i;
   75     struct match mv;
   76     register struct match *m = &mv;
   77     register char *dp;
   78     const register sopno gf = g->firststate+1;  /* +1 for OEND */
   79     const register sopno gl = g->laststate;
   80     char *start;
   81     char *stop;
   82 
   83     /* simplify the situation where possible */
   84     if (g->cflags&REG_NOSUB)
   85         nmatch = 0;
   86     if (eflags&REG_STARTEND) {
   87         start = string + pmatch[0].rm_so;
   88         stop = string + pmatch[0].rm_eo;
   89     } else {
   90         start = string;
   91         stop = start + strlen(start);
   92     }
   93     if (stop < start)
   94         return(REG_INVARG);
   95 
   96     /* prescreening; this does wonders for this rather slow code */
   97     if (g->must != NULL) {
   98         for (dp = start; dp < stop; dp++)
   99             if (*dp == g->must[0] && stop - dp >= g->mlen &&
  100                 memcmp(dp, g->must, (size_t)g->mlen) == 0)
  101                 break;
  102         if (dp == stop)     /* we didn't find g->must */
  103             return(REG_NOMATCH);
  104     }
  105 
  106     /* match struct setup */
  107     m->g = g;
  108     m->eflags = eflags;
  109     m->pmatch = NULL;
  110     m->lastpos = NULL;
  111     m->offp = string;
  112     m->beginp = start;
  113     m->endp = stop;
  114     STATESETUP(m, 4);
  115     SETUP(m->st);
  116     SETUP(m->fresh);
  117     SETUP(m->tmp);
  118     SETUP(m->empty);
  119     CLEAR(m->empty);
  120 
  121     /* this loop does only one repetition except for backrefs */
  122     for (;;) {
  123         endp = fast(m, start, stop, gf, gl);
  124         if (endp == NULL) {     /* a miss */
  125             STATETEARDOWN(m);
  126             return(REG_NOMATCH);
  127         }
  128         if (nmatch == 0 && !g->backrefs)
  129             break;      /* no further info needed */
  130 
  131         /* where? */
  132         assert(m->coldp != NULL);
  133         for (;;) {
  134             NOTE("finding start");
  135             endp = slow(m, m->coldp, stop, gf, gl);
  136             if (endp != NULL)
  137                 break;
  138             assert(m->coldp < m->endp);
  139             m->coldp++;
  140         }
  141         if (nmatch == 1 && !g->backrefs)
  142             break;      /* no further info needed */
  143 
  144         /* oh my, he wants the subexpressions... */
  145         if (m->pmatch == NULL)
  146             m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
  147                             sizeof(regmatch_t));
  148         if (m->pmatch == NULL) {
  149             STATETEARDOWN(m);
  150             return(REG_ESPACE);
  151         }
  152         for (i = 1; i <= m->g->nsub; i++)
  153             m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
  154         if (!g->backrefs && !(m->eflags&REG_BACKR)) {
  155             NOTE("dissecting");
  156             dp = dissect(m, m->coldp, endp, gf, gl);
  157         } else {
  158             if (g->nplus > 0 && m->lastpos == NULL)
  159                 m->lastpos = (char **)malloc((g->nplus+1) *
  160                             sizeof(char *));
  161             if (g->nplus > 0 && m->lastpos == NULL) {
  162                 free(m->pmatch);
  163                 STATETEARDOWN(m);
  164                 return(REG_ESPACE);
  165             }
  166             NOTE("backref dissect");
  167             dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
  168         }
  169         if (dp != NULL)
  170             break;
  171 
  172         /* uh-oh... we couldn't find a subexpression-level match */
  173         assert(g->backrefs);    /* must be back references doing it */
  174         assert(g->nplus == 0 || m->lastpos != NULL);
  175         for (;;) {
  176             if (dp != NULL || endp <= m->coldp)
  177                 break;      /* defeat */
  178             NOTE("backoff");
  179             endp = slow(m, m->coldp, endp-1, gf, gl);
  180             if (endp == NULL)
  181                 break;      /* defeat */
  182             /* try it on a shorter possibility */
  183 #ifndef NDEBUG
  184             for (i = 1; i <= m->g->nsub; i++) {
  185                 assert(m->pmatch[i].rm_so == -1);
  186                 assert(m->pmatch[i].rm_eo == -1);
  187             }
  188 #endif
  189             NOTE("backoff dissect");
  190             dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
  191         }
  192         assert(dp == NULL || dp == endp);
  193         if (dp != NULL)     /* found a shorter one */
  194             break;
  195 
  196         /* despite initial appearances, there is no match here */
  197         NOTE("false alarm");
  198         start = m->coldp + 1;   /* recycle starting later */
  199         assert(start <= stop);
  200     }
  201 
  202     /* fill in the details if requested */
  203     if (nmatch > 0) {
  204         pmatch[0].rm_so = m->coldp - m->offp;
  205         pmatch[0].rm_eo = endp - m->offp;
  206     }
  207     if (nmatch > 1) {
  208         assert(m->pmatch != NULL);
  209         for (i = 1; i < nmatch; i++)
  210             if (i <= m->g->nsub)
  211                 pmatch[i] = m->pmatch[i];
  212             else {
  213                 pmatch[i].rm_so = -1;
  214                 pmatch[i].rm_eo = -1;
  215             }
  216     }
  217 
  218     if (m->pmatch != NULL)
  219         free((char *)m->pmatch);
  220     if (m->lastpos != NULL)
  221         free((char *)m->lastpos);
  222     STATETEARDOWN(m);
  223     return(0);
  224 }
  225 
  226 /*
  227  - dissect - figure out what matched what, no back references
  228  == static char *dissect(register struct match *m, char *start, \
  229  == char *stop, sopno startst, sopno stopst);
  230  */
  231 static char *           /* == stop (success) always */
  232 dissect(m, start, stop, startst, stopst)
  233 register struct match *m;
  234 char *start;
  235 char *stop;
  236 sopno startst;
  237 sopno stopst;
  238 {
  239     register int i;
  240     register sopno ss;  /* start sop of current subRE */
  241     register sopno es;  /* end sop of current subRE */
  242     register char *sp;  /* start of string matched by it */
  243     register char *stp; /* string matched by it cannot pass here */
  244     register char *rest;    /* start of rest of string */
  245     register char *tail;    /* string unmatched by rest of RE */
  246     register sopno ssub;    /* start sop of subsubRE */
  247     register sopno esub;    /* end sop of subsubRE */
  248     register char *ssp; /* start of string matched by subsubRE */
  249     register char *sep; /* end of string matched by subsubRE */
  250     register char *oldssp;  /* previous ssp */
  251     register char *dp;
  252 
  253     AT("diss", start, stop, startst, stopst);
  254     sp = start;
  255     for (ss = startst; ss < stopst; ss = es) {
  256         /* identify end of subRE */
  257         es = ss;
  258         switch (OP(m->g->strip[es])) {
  259         case OPLUS_:
  260         case OQUEST_:
  261             es += OPND(m->g->strip[es]);
  262             break;
  263         case OCH_:
  264             while (OP(m->g->strip[es]) != O_CH)
  265                 es += OPND(m->g->strip[es]);
  266             break;
  267         }
  268         es++;
  269 
  270         /* figure out what it matched */
  271         switch (OP(m->g->strip[ss])) {
  272         case OEND:
  273             assert(nope);
  274             break;
  275         case OCHAR:
  276             sp++;
  277             break;
  278         case OBOL:
  279         case OEOL:
  280         case OBOW:
  281         case OEOW:
  282             break;
  283         case OANY:
  284         case OANYOF:
  285             sp++;
  286             break;
  287         case OBACK_:
  288         case O_BACK:
  289             assert(nope);
  290             break;
  291         /* cases where length of match is hard to find */
  292         case OQUEST_:
  293             stp = stop;
  294             for (;;) {
  295                 /* how long could this one be? */
  296                 rest = slow(m, sp, stp, ss, es);
  297                 assert(rest != NULL);   /* it did match */
  298                 /* could the rest match the rest? */
  299                 tail = slow(m, rest, stop, es, stopst);
  300                 if (tail == stop)
  301                     break;      /* yes! */
  302                 /* no -- try a shorter match for this one */
  303                 stp = rest - 1;
  304                 assert(stp >= sp);  /* it did work */
  305             }
  306             ssub = ss + 1;
  307             esub = es - 1;
  308             /* did innards match? */
  309             if (slow(m, sp, rest, ssub, esub) != NULL) {
  310                 dp = dissect(m, sp, rest, ssub, esub);
  311                 assert(dp == rest);
  312             } else      /* no */
  313                 assert(sp == rest);
  314             sp = rest;
  315             break;
  316         case OPLUS_:
  317             stp = stop;
  318             for (;;) {
  319                 /* how long could this one be? */
  320                 rest = slow(m, sp, stp, ss, es);
  321                 assert(rest != NULL);   /* it did match */
  322                 /* could the rest match the rest? */
  323                 tail = slow(m, rest, stop, es, stopst);
  324                 if (tail == stop)
  325                     break;      /* yes! */
  326                 /* no -- try a shorter match for this one */
  327                 stp = rest - 1;
  328                 assert(stp >= sp);  /* it did work */
  329             }
  330             ssub = ss + 1;
  331             esub = es - 1;
  332             ssp = sp;
  333             oldssp = ssp;
  334             for (;;) {  /* find last match of innards */
  335                 sep = slow(m, ssp, rest, ssub, esub);
  336                 if (sep == NULL || sep == ssp)
  337                     break;  /* failed or matched null */
  338                 oldssp = ssp;   /* on to next try */
  339                 ssp = sep;
  340             }
  341             if (sep == NULL) {
  342                 /* last successful match */
  343                 sep = ssp;
  344                 ssp = oldssp;
  345             }
  346             assert(sep == rest);    /* must exhaust substring */
  347             assert(slow(m, ssp, sep, ssub, esub) == rest);
  348             dp = dissect(m, ssp, sep, ssub, esub);
  349             assert(dp == sep);
  350             sp = rest;
  351             break;
  352         case OCH_:
  353             stp = stop;
  354             for (;;) {
  355                 /* how long could this one be? */
  356                 rest = slow(m, sp, stp, ss, es);
  357                 assert(rest != NULL);   /* it did match */
  358                 /* could the rest match the rest? */
  359                 tail = slow(m, rest, stop, es, stopst);
  360                 if (tail == stop)
  361                     break;      /* yes! */
  362                 /* no -- try a shorter match for this one */
  363                 stp = rest - 1;
  364                 assert(stp >= sp);  /* it did work */
  365             }
  366             ssub = ss + 1;
  367             esub = ss + OPND(m->g->strip[ss]) - 1;
  368             assert(OP(m->g->strip[esub]) == OOR1);
  369             for (;;) {  /* find first matching branch */
  370                 if (slow(m, sp, rest, ssub, esub) == rest)
  371                     break;  /* it matched all of it */
  372                 /* that one missed, try next one */
  373                 assert(OP(m->g->strip[esub]) == OOR1);
  374                 esub++;
  375                 assert(OP(m->g->strip[esub]) == OOR2);
  376                 ssub = esub + 1;
  377                 esub += OPND(m->g->strip[esub]);
  378                 if (OP(m->g->strip[esub]) == OOR2)
  379                     esub--;
  380                 else
  381                     assert(OP(m->g->strip[esub]) == O_CH);
  382             }
  383             dp = dissect(m, sp, rest, ssub, esub);
  384             assert(dp == rest);
  385             sp = rest;
  386             break;
  387         case O_PLUS:
  388         case O_QUEST:
  389         case OOR1:
  390         case OOR2:
  391         case O_CH:
  392             assert(nope);
  393             break;
  394         case OLPAREN:
  395             i = OPND(m->g->strip[ss]);
  396             assert(0 < i && i <= m->g->nsub);
  397             m->pmatch[i].rm_so = sp - m->offp;
  398             break;
  399         case ORPAREN:
  400             i = OPND(m->g->strip[ss]);
  401             assert(0 < i && i <= m->g->nsub);
  402             m->pmatch[i].rm_eo = sp - m->offp;
  403             break;
  404         default:        /* uh oh */
  405             assert(nope);
  406             break;
  407         }
  408     }
  409 
  410     assert(sp == stop);
  411     return(sp);
  412 }
  413 
  414 /*
  415  - backref - figure out what matched what, figuring in back references
  416  == static char *backref(register struct match *m, char *start, \
  417  == char *stop, sopno startst, sopno stopst, sopno lev);
  418  */
  419 static char *           /* == stop (success) or NULL (failure) */
  420 backref(m, start, stop, startst, stopst, lev)
  421 register struct match *m;
  422 char *start;
  423 char *stop;
  424 sopno startst;
  425 sopno stopst;
  426 sopno lev;          /* PLUS nesting level */
  427 {
  428     register int i;
  429     register sopno ss;  /* start sop of current subRE */
  430     register char *sp;  /* start of string matched by it */
  431     register sopno ssub;    /* start sop of subsubRE */
  432     register sopno esub;    /* end sop of subsubRE */
  433     register char *ssp; /* start of string matched by subsubRE */
  434     register char *dp;
  435     register size_t len;
  436     register int hard;
  437     register sop s;
  438     register regoff_t offsave;
  439     register cset *cs;
  440 
  441     AT("back", start, stop, startst, stopst);
  442     sp = start;
  443 
  444     /* get as far as we can with easy stuff */
  445     hard = 0;
  446     for (ss = startst; !hard && ss < stopst; ss++)
  447         switch (OP(s = m->g->strip[ss])) {
  448         case OCHAR:
  449             if (sp == stop || *sp++ != (char)OPND(s))
  450                 return(NULL);
  451             break;
  452         case OANY:
  453             if (sp == stop)
  454                 return(NULL);
  455             sp++;
  456             break;
  457         case OANYOF:
  458             cs = &m->g->sets[OPND(s)];
  459             if (sp == stop || !CHIN(cs, *sp++))
  460                 return(NULL);
  461             break;
  462         case OBOL:
  463             if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
  464                     (sp < m->endp && *(sp-1) == '\n' &&
  465                         (m->g->cflags&REG_NEWLINE)) )
  466                 { /* yes */ }
  467             else
  468                 return(NULL);
  469             break;
  470         case OEOL:
  471             if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
  472                     (sp < m->endp && *sp == '\n' &&
  473                         (m->g->cflags&REG_NEWLINE)) )
  474                 { /* yes */ }
  475             else
  476                 return(NULL);
  477             break;
  478         case OBOW:
  479             if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
  480                     (sp < m->endp && *(sp-1) == '\n' &&
  481                         (m->g->cflags&REG_NEWLINE)) ||
  482                     (sp > m->beginp &&
  483                             !ISWORD(*(sp-1))) ) &&
  484                     (sp < m->endp && ISWORD(*sp)) )
  485                 { /* yes */ }
  486             else
  487                 return(NULL);
  488             break;
  489         case OEOW:
  490             if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
  491                     (sp < m->endp && *sp == '\n' &&
  492                         (m->g->cflags&REG_NEWLINE)) ||
  493                     (sp < m->endp && !ISWORD(*sp)) ) &&
  494                     (sp > m->beginp && ISWORD(*(sp-1))) )
  495                 { /* yes */ }
  496             else
  497                 return(NULL);
  498             break;
  499         case O_QUEST:
  500             break;
  501         case OOR1:  /* matches null but needs to skip */
  502             ss++;
  503             s = m->g->strip[ss];
  504             do {
  505                 assert(OP(s) == OOR2);
  506                 ss += OPND(s);
  507             } while (OP(s = m->g->strip[ss]) != O_CH);
  508             /* note that the ss++ gets us past the O_CH */
  509             break;
  510         default:    /* have to make a choice */
  511             hard = 1;
  512             break;
  513         }
  514     if (!hard) {        /* that was it! */
  515         if (sp != stop)
  516             return(NULL);
  517         return(sp);
  518     }
  519     ss--;           /* adjust for the for's final increment */
  520 
  521     /* the hard stuff */
  522     AT("hard", sp, stop, ss, stopst);
  523     s = m->g->strip[ss];
  524     switch (OP(s)) {
  525     case OBACK_:        /* the vilest depths */
  526         i = OPND(s);
  527         assert(0 < i && i <= m->g->nsub);
  528         if (m->pmatch[i].rm_eo == -1)
  529             return(NULL);
  530         assert(m->pmatch[i].rm_so != -1);
  531         len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
  532         assert(stop - m->beginp >= len);
  533         if (sp > stop - len)
  534             return(NULL);   /* not enough left to match */
  535         ssp = m->offp + m->pmatch[i].rm_so;
  536         if (memcmp(sp, ssp, len) != 0)
  537             return(NULL);
  538         while (m->g->strip[ss] != SOP(O_BACK, i))
  539             ss++;
  540         return(backref(m, sp+len, stop, ss+1, stopst, lev));
  541         break;
  542     case OQUEST_:       /* to null or not */
  543         dp = backref(m, sp, stop, ss+1, stopst, lev);
  544         if (dp != NULL)
  545             return(dp); /* not */
  546         return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
  547         break;
  548     case OPLUS_:
  549         assert(m->lastpos != NULL);
  550         assert(lev+1 <= m->g->nplus);
  551         m->lastpos[lev+1] = sp;
  552         return(backref(m, sp, stop, ss+1, stopst, lev+1));
  553         break;
  554     case O_PLUS:
  555         if (sp == m->lastpos[lev])  /* last pass matched null */
  556             return(backref(m, sp, stop, ss+1, stopst, lev-1));
  557         /* try another pass */
  558         m->lastpos[lev] = sp;
  559         dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
  560         if (dp == NULL)
  561             return(backref(m, sp, stop, ss+1, stopst, lev-1));
  562         else
  563             return(dp);
  564         break;
  565     case OCH_:      /* find the right one, if any */
  566         ssub = ss + 1;
  567         esub = ss + OPND(s) - 1;
  568         assert(OP(m->g->strip[esub]) == OOR1);
  569         for (;;) {  /* find first matching branch */
  570             dp = backref(m, sp, stop, ssub, esub, lev);
  571             if (dp != NULL)
  572                 return(dp);
  573             /* that one missed, try next one */
  574             if (OP(m->g->strip[esub]) == O_CH)
  575                 return(NULL);   /* there is none */
  576             esub++;
  577             assert(OP(m->g->strip[esub]) == OOR2);
  578             ssub = esub + 1;
  579             esub += OPND(m->g->strip[esub]);
  580             if (OP(m->g->strip[esub]) == OOR2)
  581                 esub--;
  582             else
  583                 assert(OP(m->g->strip[esub]) == O_CH);
  584         }
  585         break;
  586     case OLPAREN:       /* must undo assignment if rest fails */
  587         i = OPND(s);
  588         assert(0 < i && i <= m->g->nsub);
  589         offsave = m->pmatch[i].rm_so;
  590         m->pmatch[i].rm_so = sp - m->offp;
  591         dp = backref(m, sp, stop, ss+1, stopst, lev);
  592         if (dp != NULL)
  593             return(dp);
  594         m->pmatch[i].rm_so = offsave;
  595         return(NULL);
  596         break;
  597     case ORPAREN:       /* must undo assignment if rest fails */
  598         i = OPND(s);
  599         assert(0 < i && i <= m->g->nsub);
  600         offsave = m->pmatch[i].rm_eo;
  601         m->pmatch[i].rm_eo = sp - m->offp;
  602         dp = backref(m, sp, stop, ss+1, stopst, lev);
  603         if (dp != NULL)
  604             return(dp);
  605         m->pmatch[i].rm_eo = offsave;
  606         return(NULL);
  607         break;
  608     default:        /* uh oh */
  609         assert(nope);
  610         break;
  611     }
  612 
  613     /* "can't happen" */
  614     assert(nope);
  615     /* NOTREACHED */
  616     return((char *)NULL);   /* dummy */
  617 }
  618 
  619 /*
  620  - fast - step through the string at top speed
  621  == static char *fast(register struct match *m, char *start, \
  622  == char *stop, sopno startst, sopno stopst);
  623  */
  624 static char *           /* where tentative match ended, or NULL */
  625 fast(m, start, stop, startst, stopst)
  626 register struct match *m;
  627 char *start;
  628 char *stop;
  629 sopno startst;
  630 sopno stopst;
  631 {
  632     register states st = m->st;
  633     register states fresh = m->fresh;
  634     register states tmp = m->tmp;
  635     register char *p = start;
  636     register int c = (start == m->beginp) ? OUT : *(start-1);
  637     register int lastc; /* previous c */
  638     register int flagch;
  639     register int i;
  640     register char *coldp;   /* last p after which no match was underway */
  641 
  642     CLEAR(st);
  643     SET1(st, startst);
  644     st = step(m->g, startst, stopst, st, NOTHING, st);
  645     ASSIGN(fresh, st);
  646     SP("start", st, *p);
  647     coldp = NULL;
  648     for (;;) {
  649         /* next character */
  650         lastc = c;
  651         c = (p == m->endp) ? OUT : *p;
  652         if (EQ(st, fresh))
  653             coldp = p;
  654 
  655         /* is there an EOL and/or BOL between lastc and c? */
  656         flagch = '\0';
  657         i = 0;
  658         if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
  659                 (lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
  660             flagch = BOL;
  661             i = m->g->nbol;
  662         }
  663         if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
  664                 (c == OUT && !(m->eflags&REG_NOTEOL)) ) {
  665             flagch = (flagch == BOL) ? BOLEOL : EOL;
  666             i += m->g->neol;
  667         }
  668         if (i != 0) {
  669             for (; i > 0; i--)
  670                 st = step(m->g, startst, stopst, st, flagch, st);
  671             SP("boleol", st, c);
  672         }
  673 
  674         /* how about a word boundary? */
  675         if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
  676                     (c != OUT && ISWORD(c)) ) {
  677             flagch = BOW;
  678         }
  679         if ( (lastc != OUT && ISWORD(lastc)) &&
  680                 (flagch == EOL || (c != OUT && !ISWORD(c))) ) {
  681             flagch = EOW;
  682         }
  683         if (flagch == BOW || flagch == EOW) {
  684             st = step(m->g, startst, stopst, st, flagch, st);
  685             SP("boweow", st, c);
  686         }
  687 
  688         /* are we done? */
  689         if (ISSET(st, stopst) || p == stop)
  690             break;      /* NOTE BREAK OUT */
  691 
  692         /* no, we must deal with this character */
  693         ASSIGN(tmp, st);
  694         ASSIGN(st, fresh);
  695         assert(c != OUT);
  696         st = step(m->g, startst, stopst, tmp, c, st);
  697         SP("aft", st, c);
  698         assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
  699         p++;
  700     }
  701 
  702     assert(coldp != NULL);
  703     m->coldp = coldp;
  704     if (ISSET(st, stopst))
  705         return(p+1);
  706     else
  707         return(NULL);
  708 }
  709 
  710 /*
  711  - slow - step through the string more deliberately
  712  == static char *slow(register struct match *m, char *start, \
  713  == char *stop, sopno startst, sopno stopst);
  714  */
  715 static char *           /* where it ended */
  716 slow(m, start, stop, startst, stopst)
  717 register struct match *m;
  718 char *start;
  719 char *stop;
  720 sopno startst;
  721 sopno stopst;
  722 {
  723     register states st = m->st;
  724     register states empty = m->empty;
  725     register states tmp = m->tmp;
  726     register char *p = start;
  727     register int c = (start == m->beginp) ? OUT : *(start-1);
  728     register int lastc; /* previous c */
  729     register int flagch;
  730     register int i;
  731     register char *matchp;  /* last p at which a match ended */
  732 
  733     AT("slow", start, stop, startst, stopst);
  734     CLEAR(st);
  735     SET1(st, startst);
  736     SP("sstart", st, *p);
  737     st = step(m->g, startst, stopst, st, NOTHING, st);
  738     matchp = NULL;
  739     for (;;) {
  740         /* next character */
  741         lastc = c;
  742         c = (p == m->endp) ? OUT : *p;
  743 
  744         /* is there an EOL and/or BOL between lastc and c? */
  745         flagch = '\0';
  746         i = 0;
  747         if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
  748                 (lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
  749             flagch = BOL;
  750             i = m->g->nbol;
  751         }
  752         if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
  753                 (c == OUT && !(m->eflags&REG_NOTEOL)) ) {
  754             flagch = (flagch == BOL) ? BOLEOL : EOL;
  755             i += m->g->neol;
  756         }
  757         if (i != 0) {
  758             for (; i > 0; i--)
  759                 st = step(m->g, startst, stopst, st, flagch, st);
  760             SP("sboleol", st, c);
  761         }
  762 
  763         /* how about a word boundary? */
  764         if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
  765                     (c != OUT && ISWORD(c)) ) {
  766             flagch = BOW;
  767         }
  768         if ( (lastc != OUT && ISWORD(lastc)) &&
  769                 (flagch == EOL || (c != OUT && !ISWORD(c))) ) {
  770             flagch = EOW;
  771         }
  772         if (flagch == BOW || flagch == EOW) {
  773             st = step(m->g, startst, stopst, st, flagch, st);
  774             SP("sboweow", st, c);
  775         }
  776 
  777         /* are we done? */
  778         if (ISSET(st, stopst))
  779             matchp = p;
  780         if (EQ(st, empty) || p == stop)
  781             break;      /* NOTE BREAK OUT */
  782 
  783         /* no, we must deal with this character */
  784         ASSIGN(tmp, st);
  785         ASSIGN(st, empty);
  786         assert(c != OUT);
  787         st = step(m->g, startst, stopst, tmp, c, st);
  788         SP("saft", st, c);
  789         assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
  790         p++;
  791     }
  792 
  793     return(matchp);
  794 }
  795 
  796 
  797 /*
  798  - step - map set of states reachable before char to set reachable after
  799  == static states step(register struct re_guts *g, sopno start, sopno stop, \
  800  == register states bef, int ch, register states aft);
  801  == #define BOL (OUT+1)
  802  == #define EOL (BOL+1)
  803  == #define BOLEOL  (BOL+2)
  804  == #define NOTHING (BOL+3)
  805  == #define BOW (BOL+4)
  806  == #define EOW (BOL+5)
  807  == #define CODEMAX (BOL+5)     // highest code used
  808  == #define NONCHAR(c)  ((c) > CHAR_MAX)
  809  == #define NNONCHAR    (CODEMAX-CHAR_MAX)
  810  */
  811 static states
  812 step(g, start, stop, bef, ch, aft)
  813 register struct re_guts *g;
  814 sopno start;            /* start state within strip */
  815 sopno stop;         /* state after stop state within strip */
  816 register states bef;        /* states reachable before */
  817 int ch;             /* character or NONCHAR code */
  818 register states aft;        /* states already known reachable after */
  819 {
  820     register cset *cs;
  821     register sop s;
  822     register sopno pc;
  823     register onestate here;     /* note, macros know this name */
  824     register sopno look;
  825     register long i;
  826 
  827     for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
  828         s = g->strip[pc];
  829         switch (OP(s)) {
  830         case OEND:
  831             assert(pc == stop-1);
  832             break;
  833         case OCHAR:
  834             /* only characters can match */
  835             assert(!NONCHAR(ch) || ch != (char)OPND(s));
  836             if (ch == (char)OPND(s))
  837                 FWD(aft, bef, 1);
  838             break;
  839         case OBOL:
  840             if (ch == BOL || ch == BOLEOL)
  841                 FWD(aft, bef, 1);
  842             break;
  843         case OEOL:
  844             if (ch == EOL || ch == BOLEOL)
  845                 FWD(aft, bef, 1);
  846             break;
  847         case OBOW:
  848             if (ch == BOW)
  849                 FWD(aft, bef, 1);
  850             break;
  851         case OEOW:
  852             if (ch == EOW)
  853                 FWD(aft, bef, 1);
  854             break;
  855         case OANY:
  856             if (!NONCHAR(ch))
  857                 FWD(aft, bef, 1);
  858             break;
  859         case OANYOF:
  860             cs = &g->sets[OPND(s)];
  861             if (!NONCHAR(ch) && CHIN(cs, ch))
  862                 FWD(aft, bef, 1);
  863             break;
  864         case OBACK_:        /* ignored here */
  865         case O_BACK:
  866             FWD(aft, aft, 1);
  867             break;
  868         case OPLUS_:        /* forward, this is just an empty */
  869             FWD(aft, aft, 1);
  870             break;
  871         case O_PLUS:        /* both forward and back */
  872             FWD(aft, aft, 1);
  873             i = ISSETBACK(aft, OPND(s));
  874             BACK(aft, aft, OPND(s));
  875             if (!i && ISSETBACK(aft, OPND(s))) {
  876                 /* oho, must reconsider loop body */
  877                 pc -= OPND(s) + 1;
  878                 INIT(here, pc);
  879             }
  880             break;
  881         case OQUEST_:       /* two branches, both forward */
  882             FWD(aft, aft, 1);
  883             FWD(aft, aft, OPND(s));
  884             break;
  885         case O_QUEST:       /* just an empty */
  886             FWD(aft, aft, 1);
  887             break;
  888         case OLPAREN:       /* not significant here */
  889         case ORPAREN:
  890             FWD(aft, aft, 1);
  891             break;
  892         case OCH_:      /* mark the first two branches */
  893             FWD(aft, aft, 1);
  894             assert(OP(g->strip[pc+OPND(s)]) == OOR2);
  895             FWD(aft, aft, OPND(s));
  896             break;
  897         case OOR1:      /* done a branch, find the O_CH */
  898             if (ISSTATEIN(aft, here)) {
  899                 for (look = 1;
  900                         OP(s = g->strip[pc+look]) != O_CH;
  901                         look += OPND(s))
  902                     assert(OP(s) == OOR2);
  903                 FWD(aft, aft, look);
  904             }
  905             break;
  906         case OOR2:      /* propagate OCH_'s marking */
  907             FWD(aft, aft, 1);
  908             if (OP(g->strip[pc+OPND(s)]) != O_CH) {
  909                 assert(OP(g->strip[pc+OPND(s)]) == OOR2);
  910                 FWD(aft, aft, OPND(s));
  911             }
  912             break;
  913         case O_CH:      /* just empty */
  914             FWD(aft, aft, 1);
  915             break;
  916         default:        /* ooooops... */
  917             assert(nope);
  918             break;
  919         }
  920     }
  921 
  922     return(aft);
  923 }
  924 
  925 #ifdef REDEBUG
  926 /*
  927  - print - print a set of states
  928  == #ifdef REDEBUG
  929  == static void print(struct match *m, char *caption, states st, \
  930  == int ch, FILE *d);
  931  == #endif
  932  */
  933 static void
  934 print(m, caption, st, ch, d)
  935 struct match *m;
  936 char *caption;
  937 states st;
  938 int ch;
  939 FILE *d;
  940 {
  941     register struct re_guts *g = m->g;
  942     register int i;
  943     register int first = 1;
  944 
  945     if (!(m->eflags&REG_TRACE))
  946         return;
  947 
  948     fprintf(d, "%s", caption);
  949     if (ch != '\0')
  950         fprintf(d, " %s", pchar(ch));
  951     for (i = 0; i < g->nstates; i++)
  952         if (ISSET(st, i)) {
  953             fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
  954             first = 0;
  955         }
  956     fprintf(d, "\n");
  957 }
  958 
  959 /* 
  960  - at - print current situation
  961  == #ifdef REDEBUG
  962  == static void at(struct match *m, char *title, char *start, char *stop, \
  963  ==                     sopno startst, sopno stopst);
  964  == #endif
  965  */
  966 static void
  967 at(m, title, start, stop, startst, stopst)
  968 struct match *m;
  969 char *title;
  970 char *start;
  971 char *stop;
  972 sopno startst;
  973 sopno stopst;
  974 {
  975     if (!(m->eflags&REG_TRACE))
  976         return;
  977 
  978     printf("%s %s-", title, pchar(*start));
  979     printf("%s ", pchar(*stop));
  980     printf("%ld-%ld\n", (long)startst, (long)stopst);
  981 }
  982 
  983 #ifndef PCHARDONE
  984 #define PCHARDONE   /* never again */
  985 /*
  986  - pchar - make a character printable
  987  == #ifdef REDEBUG
  988  == static char *pchar(int ch);
  989  == #endif
  990  *
  991  * Is this identical to regchar() over in debug.c?  Well, yes.  But a
  992  * duplicate here avoids having a debugging-capable regexec.o tied to
  993  * a matching debug.o, and this is convenient.  It all disappears in
  994  * the non-debug compilation anyway, so it doesn't matter much.
  995  */
  996 static char *           /* -> representation */
  997 pchar(ch)
  998 int ch;
  999 {
 1000     static char pbuf[10];
 1001 
 1002     if (isprint(ch) || ch == ' ')
 1003         sprintf(pbuf, "%c", ch);
 1004     else
 1005         sprintf(pbuf, "\\%o", ch);
 1006     return(pbuf);
 1007 }
 1008 #endif
 1009 #endif
 1010 
 1011 #undef  matcher
 1012 #undef  fast
 1013 #undef  slow
 1014 #undef  dissect
 1015 #undef  backref
 1016 #undef  step
 1017 #undef  print
 1018 #undef  at
 1019 #undef  match