"Fossies" - the Fresh Open Source Software Archive

Member "schily-2021-09-18/match/match.c" (20 Aug 2021, 12567 Bytes) of package /linux/privat/schily-2021-09-18.tar.bz2:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file.

    1 /* @(#)match.c  1.41 21/08/20 Copyright 1985-2021 J. Schilling */
    2 #include <schily/mconfig.h>
    3 #ifndef lint
    4 static  UConst char sccsid[] =
    5     "@(#)match.c    1.41 21/08/20 Copyright 1985-2021 J. Schilling";
    6 #endif
    7 /*
    8  *  search file(s) for a pattern
    9  *
   10  *  Copyright (c) 1985-2021 J. Schilling
   11  */
   12 /*
   13  * The contents of this file are subject to the terms of the
   14  * Common Development and Distribution License, Version 1.0 only
   15  * (the "License").  You may not use this file except in compliance
   16  * with the License.
   17  *
   18  * See the file CDDL.Schily.txt in this distribution for details.
   19  * A copy of the CDDL is also available via the Internet at
   20  * http://www.opensource.org/licenses/cddl1.txt
   21  *
   22  * When distributing Covered Code, include this CDDL HEADER in each
   23  * file and include the License file CDDL.Schily.txt from this distribution.
   24  */
   25 
   26 #include <schily/stdio.h>
   27 #include <schily/stdlib.h>
   28 #include <schily/unistd.h>  /* Include sys/types.h to make off_t available */
   29 #include <schily/string.h>
   30 #include <schily/utypes.h>
   31 #include <schily/patmatch.h>
   32 #include <schily/standard.h>
   33 #include <schily/ctype.h>
   34 #define GT_COMERR       /* #define comerr gtcomerr */
   35 #define GT_ERROR        /* #define error gterror   */
   36 #include <schily/schily.h>
   37 #include <schily/nlsdefs.h>
   38 
   39 #define UC  (unsigned char *)
   40 
   41 #ifndef HAVE_VALLOC
   42 #define valloc(a)   malloc(a)
   43 #endif
   44 
   45 #define BUFSIZE 8192
   46 #define MAXLINE 8192
   47 
   48 LOCAL   char    mchars[] = { ALT, REP, NIL, STAR, LBRACK, RBRACK,
   49             LCLASS, RCLASS, QUOTE, ANY, START, END,
   50             0,
   51 };
   52 LOCAL   char    notletter[] = "{^!$![^_A-Za-z0-9]}";
   53 
   54 LOCAL   char    *buf;           /* buffer       */
   55 LOCAL   int rblen;          /* read buffer len  */
   56 LOCAL   int linelen;        /* line buffer len  */
   57 #define rbuf    (&buf[linelen])     /* read buffer      */
   58 #define line    (&buf[0])       /* line buffer      */
   59 LOCAL   char    *lcasebuf;      /* low case line buffer */
   60 
   61 LOCAL   int notflag = 0;
   62 LOCAL   int igncase = 0;
   63 LOCAL   int magic = 0;
   64 LOCAL   int nomagic = 0;
   65 LOCAL   int wordflag = 0;
   66 LOCAL   int xflag = 0;
   67 LOCAL   int cntflag = 0;
   68 LOCAL   int lflag = 0;
   69 LOCAL   int Lflag = 0;
   70 LOCAL   int Vflag = 0;
   71 LOCAL   int sflag = 0;
   72 LOCAL   int hflag = 0;
   73 LOCAL   int nflag = 0;
   74 LOCAL   int bflag = 0;
   75 LOCAL   int debug = 0;
   76 LOCAL   int dosimple = 0;
   77 
   78 LOCAL   void    usage       __PR((int exitcode));
   79 EXPORT  int main        __PR((int ac, char **av));
   80 LOCAL   int domatch     __PR((FILE *file, char *name, char *pat, int plen, int *aux, int alt, int *state));
   81 LOCAL   void    strlower    __PR((char *s, int slen));
   82 
   83 LOCAL   BOOL    issimple    __PR((char *p));
   84 LOCAL   int smatch      __PR((char *linep, int llen, char *pat, int plen));
   85 LOCAL   void    printpat    __PR((char *pat, int plen, int alt, int *aux));
   86 LOCAL   BOOL    pmatch      __PR((char *linep, int llen, char *pat, int *aux, int alt, int *state));
   87 
   88 LOCAL void
   89 usage(exitcode)
   90     int exitcode;
   91 {
   92     error("Usage:   match [options] pattern [file1...filen]\n");
   93     error("Options:\n");
   94     error(" -not,-v Print all lines that do not match\n");
   95     error(" -i  Ignore the case of letters\n");
   96     error(" -m  Force not to use the magic mode\n");
   97     error(" -M  Force to use the magic mode\n");
   98     error(" -w  Search for pattern as a word\n");
   99     error(" -x  Display only those lines which match exactly\n");
  100     error(" -c  Display matching count for each file\n");
  101     error(" -V  Display name of each file whith no matches\n");
  102     error(" -l  Display name of each file which matches\n");
  103     error(" -L  Display first matching line of each file which matches\n");
  104     error(" -s  Be silent indicate match in exitcode\n");
  105     error(" -h  Do not display filenames\n");
  106     error(" -n  Precede matching lines with line number\n");
  107     error(" -b  Precede matching lines with block number\n");
  108     error(" -help   Print this help.\n");
  109     error(" -version Print version number.\n");
  110     error(" Standard in is used if no files are specified.\n");
  111     exit(exitcode);
  112 }
  113 
  114 EXPORT int
  115 main(ac, av)
  116     int ac;
  117     char **av;
  118 {
  119     FILE *f;
  120     char *pat;
  121     int *aux   = NULL;
  122     int *state = NULL;
  123     int alt = 0;
  124     char *options = "not,v,V,i,M,m,w,x,c,l,L,s,h,n,b,help,version,d";
  125     int help = 0;
  126     int cac     = ac;
  127     char    * const *cav    = av;
  128     char    *name;
  129     int plen;
  130     int matches;
  131     int anymatch = 0;
  132     BOOL    prversion = 0;
  133 
  134     save_args(ac, av);
  135 
  136     (void) setlocale(LC_ALL, "");
  137 
  138 #ifdef  USE_NLS
  139 #if !defined(TEXT_DOMAIN)   /* Should be defined by cc -D */
  140 #define TEXT_DOMAIN "match" /* Use this only if it weren't */
  141 #endif
  142     { char  *dir;
  143     dir = searchfileinpath("share/locale", F_OK,
  144                     SIP_ANY_FILE|SIP_NO_PATH, NULL);
  145     if (dir)
  146         (void) bindtextdomain(TEXT_DOMAIN, dir);
  147     else
  148 #if defined(PROTOTYPES) && defined(INS_BASE)
  149     (void) bindtextdomain(TEXT_DOMAIN, INS_BASE "/share/locale");
  150 #else
  151     (void) bindtextdomain(TEXT_DOMAIN, "/usr/share/locale");
  152 #endif
  153     (void) textdomain(TEXT_DOMAIN);
  154     }
  155 #endif  /* USE_NLS */
  156 
  157     if (getallargs(&cac, &cav, options,
  158             &notflag, &notflag,
  159             &Vflag,
  160             &igncase,
  161             &magic,
  162             &nomagic,
  163             &wordflag,
  164             &xflag,
  165             &cntflag, &lflag, &Lflag, &sflag,
  166             &hflag, &nflag, &bflag, &help, &prversion,
  167             &debug) < 0) {
  168         errmsgno(EX_BAD, "Bad flag: %s.\n", cav[0]);
  169         usage(EX_BAD);
  170     }
  171     if (help)
  172         usage(0);
  173     if (prversion) {
  174         gtprintf("Match release %s (%s-%s-%s) Copyright (C) 1985-2021 %s\n",
  175                 "1.41",
  176                 HOST_CPU, HOST_VENDOR, HOST_OS,
  177                 _("Jörg Schilling"));
  178         exit(0);
  179     }
  180 
  181     if (Vflag)
  182         sflag++;    /* Be silent while searching */
  183 
  184     cac = ac;
  185     cav = av;
  186     cac--, cav++;
  187     if (getfiles(&cac, &cav, options) <= 0) {
  188         errmsgno(EX_BAD, "No pattern given.\n");
  189         usage(EX_BAD);
  190     }
  191     pat = cav[0];
  192     cac--, cav++;
  193 
  194     plen = strlen(pat);
  195     if (magic)
  196         nomagic = 0;
  197     if (wordflag) {
  198         if (nomagic)
  199             comerrno(EX_BAD,
  200                 "Cannot match words in nomagic mode.\n");
  201         plen += 2 * (sizeof (notletter) - 1);
  202         if ((name = malloc(plen+1)) == NULL)
  203             comerrno(EX_BAD, "No memory for pattern");
  204         strcatl(name, notletter, pat, notletter, (char *)NULL);
  205         pat = name;
  206     }
  207     if (igncase)
  208         strlower(pat, plen);
  209     if (nomagic || (!magic && issimple(pat))) {
  210         dosimple = TRUE;
  211     } else {
  212         aux = malloc(sizeof (int)*plen);
  213         state = malloc(sizeof (int)*(plen+1));
  214         if (aux == NULL || state == NULL)
  215             comerrno(EX_BAD, "No memory for pattern compiler.");
  216 
  217         if ((alt = patcompile(UC pat, plen, aux)) == 0)
  218             comerrno(EX_BAD, "Bad pattern: '%s'.\n", pat);
  219     }
  220     if (debug)
  221         printpat(pat, plen, alt, aux);
  222 
  223     while (rblen < BUFSIZE)
  224         rblen += getpagesize();
  225     while (linelen < MAXLINE)
  226         linelen += getpagesize();
  227 
  228     buf = valloc(linelen+rblen);
  229     lcasebuf = valloc(linelen);
  230     if (buf == NULL || lcasebuf == NULL)
  231         comerr("No memory for read buffer.\n");
  232 
  233     if (getfiles(&cac, &cav, options) <= 0) {   /* match stdin */
  234         name = "stdin";
  235         hflag++;
  236 #ifdef  _FASCII     /* Mark Williams C  */
  237         stdin->_ff &= ~_FASCII;
  238 #endif
  239         if ((matches = domatch(stdin, name, pat, plen, aux, alt, state)) != 0)
  240             anymatch++;
  241         if (cntflag)
  242             printf("%s:%d\n", name, matches);
  243         else if (Vflag && !matches)
  244             printf("%s\n", name);
  245         else if (lflag && matches)
  246             printf("%s\n", name);
  247     } else for (; getfiles(&cac, &cav, options); cac--, cav++) {
  248         name = cav[0];
  249         f = fileopen(name, "ru");
  250         if (f == NULL)
  251             errmsg("Cannot open '%s'.\n", name);
  252         else {
  253 #ifdef  _FASCII     /* Mark Williams C  */
  254             f->_ff &= ~_FASCII;
  255 #endif
  256             file_raise(f, FALSE);
  257             if ((matches = domatch(f, name, pat, plen, aux, alt, state)) != 0)
  258                 anymatch++;
  259             fclose(f);
  260             if (cntflag)
  261                 printf("%s:%d\n", name, matches);
  262             else if (Vflag && !matches)
  263                 printf("%s\n", name);
  264             else if (lflag && matches)
  265                 printf("%s\n", name);
  266         }
  267     }
  268     exit(anymatch ? 0 : 1);
  269     return (anymatch ? 0 : 1);  /* Keep lint happy */
  270 }
  271 
  272 /*
  273  * Search one file for a pattern.
  274  */
  275 LOCAL int
  276 domatch(f, name, pat, plen, aux, alt, state)
  277     register FILE *f;
  278     char *name;
  279     char *pat;
  280     register int plen;
  281     int *aux;
  282     int alt;
  283     int *state;
  284 {
  285     register char *linep;       /* pointer to fill up line */
  286     register char *pbuf = rbuf; /* pointer to read buffer */
  287     register int lbuf;      /* chars in read buffer */
  288     register int llen;
  289     register char c;        /* temp */
  290     off_t total = 0;        /* total number of bytes read */
  291     int lineno = 0;         /* current line number */
  292     int matches = 0;        /* current match count */
  293     BOOL matched = TRUE;        /* last line has match */
  294     BOOL eof = FALSE;
  295     int nl = 0;         /* line has nl */
  296     int r;
  297 
  298     lbuf = 0;
  299     for (;;) {
  300         if (!matched && !eof && nl == 0 && plen > 1) {
  301             /*
  302              * If we are going to continue matching and the last
  303              * match was for a long line (llen > linelen) then
  304              * move the unmatched part of our line buffer to the
  305              * beginning.
  306              */
  307             linep = movebytes(line-plen+linelen+1, line, plen-1);
  308             llen = linelen+1-plen;
  309         } else {
  310             /*
  311              * Start filling up a new line.
  312              */
  313             linep = line;
  314             llen = linelen;
  315         }
  316         matched = FALSE;
  317         nl = 0;
  318         for (;;) {
  319             if (--lbuf < 0) {
  320                 lbuf = ffileread(f, rbuf, rblen);
  321                 if (lbuf < 0) {
  322                     /*
  323                      * This may happen on NFS-mounted
  324                      * directories or OS that do not allow
  325                      * to read(2) directories, so we have
  326                      * to tolerate it.
  327                      */
  328                     errmsg("Cannot read '%s'.\n", name);
  329                     return (matches);
  330                 }
  331                 if (lbuf == 0) {    /* read hit EOF */
  332                     eof = TRUE;
  333                     if (linep != line)
  334                         break;
  335                     else
  336                         return (matches);
  337                 }
  338                 pbuf = rbuf;
  339                 total += lbuf;
  340                 lbuf--;
  341             }
  342             if ((c = *pbuf++) == '\n') {
  343                 nl = 1;
  344                 lineno++;
  345                 break;
  346             }
  347             if (--llen >= 0) {
  348                 *linep++ = c;
  349             } else {
  350                 lbuf++;
  351                 pbuf--;
  352                 break;
  353             }
  354         }
  355         /**plin = 0;*/
  356         llen = llen < 0 ? linelen : linelen - llen;
  357 
  358         if ((r = dosimple   ? smatch(line, llen, pat, plen)
  359                     : pmatch(line, llen, pat, aux, alt, state)) != 0) {
  360             if (notflag)
  361                 continue;
  362         } else {
  363             if (!notflag)
  364                 continue;
  365         }
  366         matches++;
  367         matched = TRUE;
  368         if (lflag)
  369             return (1);
  370         if (cntflag || sflag)
  371             continue;
  372         if (name && !hflag)
  373             printf("%s:", name);
  374         if (nflag)
  375             printf("%d:", lineno);
  376         if (bflag)
  377             printf("%lld:", (Llong)((total-lbuf-r-nl)/512));
  378         (void) filewrite(stdout, line, llen);
  379         putchar('\n');
  380         flush();
  381         if (Lflag)
  382             return (1);
  383     }
  384 }
  385 
  386 /*
  387  * Convert a string in place to lower case.
  388  */
  389 LOCAL void
  390 strlower(s, slen)
  391     register char   *s;
  392     register int    slen;
  393 {
  394     register Uchar  c;
  395 
  396     while (--slen >= 0) {
  397         c = (Uchar)*s;
  398         if (isupper(c))
  399             *s = (char)tolower(c);
  400         s++;
  401     }
  402 }
  403 
  404 /*
  405  * Check whether the pattern only has non-magic chars.
  406  */
  407 LOCAL BOOL
  408 issimple(p)
  409     register char *p;
  410 {
  411     while (*p) {
  412         if (strchr(mchars, *p++))
  413             return (FALSE);
  414     }
  415     return (TRUE);
  416 }
  417 
  418 /*
  419  * Simple (non regular expression) match.
  420  *
  421  * Check one line (or the buffer if no newline was found) for matches.
  422  */
  423 LOCAL int
  424 smatch(linep, llen, pat, plen)
  425     register char   *linep;
  426     register int    llen;
  427         char    *pat;
  428         int plen;
  429 {
  430     register char   *lp;        /* Line pointer     */
  431     register char   *pp;        /* Pattern pointer  */
  432     register char   *rpat = pat;
  433     register char   c = *pat;
  434 
  435     if (igncase) {
  436         movebytes(linep, lcasebuf, llen);
  437         strlower(linep = lcasebuf, llen);
  438     }
  439 #ifdef  MDEBUG
  440     printf("llen0 %d %.*s\n", llen, llen, linep);
  441 #endif
  442     if (xflag) {
  443         if (llen == 0)
  444             return (*rpat == '\0');
  445         if (llen != plen)
  446             return (0);
  447         for (lp = linep, pp = rpat; --llen >= 0; )
  448             if (*lp++ != *pp++)
  449                 return (0);
  450         return (1);
  451 
  452         /* CSTYLED */
  453     } else for (llen -= plen-2; --llen > 0; ) {
  454 #ifdef  MDEBUG
  455         printf("llen1 %d %.*s\n", llen, llen, linep);
  456 #endif
  457         /*
  458          * With a linelength of 16 and above, findbytes() is faster
  459          */
  460         if (llen < 16) {
  461             while (llen > 0 && *linep != c) {
  462                 linep++;
  463                 llen--;
  464             }
  465             if (llen <= 0)
  466                 return (0);
  467         } else {
  468             lp = findbytes(linep, llen, c);
  469             if (lp == NULL)
  470                 return (0);
  471             llen -= lp - linep;
  472             linep = lp;
  473         }
  474 #ifdef  MDEBUG
  475         printf("llen2 %d %.*s\n", llen, llen, linep);
  476 #endif
  477 
  478         for (lp = linep++, pp = rpat; ; )
  479             if (*pp == 0)
  480                 return (llen+plen);
  481 #ifdef  __needed__
  482             else if (*lp == 0)
  483                 return (0);
  484 #endif
  485             else if (*pp++ != *lp++)
  486                 break;
  487     }
  488     return (0);
  489 }
  490 
  491 LOCAL void
  492 printpat(pat, plen, alt, aux)
  493     char    *pat;
  494     int plen;
  495     int alt;
  496     int aux[];
  497 {
  498     register int    i;
  499 
  500     printf("pattern: '%s'.\n", pat);
  501     printf("patlen : %d.\n", plen);
  502     if (!dosimple) {
  503         printf("alt    : %d.\n", alt);
  504         printf("aux    :");
  505         for (i = 0; i < plen; i++)
  506             printf(" %d", aux[i]);
  507         printf(".\n");
  508     }
  509 }
  510 
  511 /*
  512  * Pattern (using regular expressions) match.
  513  *
  514  * Check one line (or the buffer if no newline was found) for matches.
  515  */
  516 LOCAL BOOL
  517 pmatch(linep, llen, pat, aux, alt, state)
  518     char    *linep;
  519     int llen;
  520     char    *pat;
  521     int *aux;
  522     int alt;
  523     int *state;
  524 {
  525     if (igncase) {
  526         movebytes(linep, lcasebuf, llen);
  527         strlower(linep = lcasebuf, llen);
  528     }
  529     if (xflag) {
  530         return (((long)((char *)patmatch(UC pat, aux, UC linep, 0, llen, alt, state) - linep))
  531                                 == llen);
  532     } else {
  533         return (patlmatch(UC pat, aux, UC linep, 0, llen, alt, state) != 0);
  534     }
  535 }