"Fossies" - the Fresh Open Source Software Archive

Member "replace-2.24/auto.c" (19 Jun 2004, 4372 Bytes) of package /linux/privat/old/replace-2.24-src-11.11.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file.

    1 /* auto.c - The automatic binary vs. text detecting code
    2    (C) Richard K. Lloyd 2001-2004
    3 */
    4 
    5 #define Extern extern
    6 #include "replace.h"
    7 
    8 /* Binary codes array - determines which of the 256 possible char
    9    values are binary bytes (would never appear in a text file). This is
   10    complicated by the existence of 8-bit chars of course - if people want
   11    to strongly argue that a char is text and not binary (or vice versa),
   12    e-mail replace@richardlloyd.org.uk and we'll discuss flipping that byte's
   13    status here in a future release. For the moment, I'm sticking to 7-bit
   14    viewable chars, plus selected 8-bit chars that annoying Word users drop
   15    happily into their "text" files when they convert them into text or HTML */
   16 
   17 static int bincodes[256]=
   18 /* 0 = It's a text char, 1 it's a binary char */
   19 {
   20    /* Control codes first, most of which are binary */
   21    /* 000-007: */ 1, 1, 1, 1, 1, 1, 1, 1,
   22    /* 9 is a tab character, 10 is a line feed and 13 is carriage return */
   23    /* 008-015: */ 1, 0, 0, 1, 1, 0, 1, 1,
   24    /* 016-023: */ 1, 1, 1, 1, 1, 1, 1, 1,
   25    /* Idiotic DOS text files use CTRL-Z (26) to terminate ! */
   26    /* 024-031: */ 1, 1, 0, 1, 1, 1, 1, 1,
   27    /* We're now into text chars (32=space through to 126=tilde) */
   28    /* 032-039: */ 0, 0, 0, 0, 0, 0, 0, 0,
   29    /* 040-039: */ 0, 0, 0, 0, 0, 0, 0, 0, 
   30    /* 048-039: */ 0, 0, 0, 0, 0, 0, 0, 0,
   31    /* 056-039: */ 0, 0, 0, 0, 0, 0, 0, 0,
   32    /* 064-039: */ 0, 0, 0, 0, 0, 0, 0, 0,
   33    /* 072-039: */ 0, 0, 0, 0, 0, 0, 0, 0,
   34    /* 080-039: */ 0, 0, 0, 0, 0, 0, 0, 0,
   35    /* 088-039: */ 0, 0, 0, 0, 0, 0, 0, 0,
   36    /* 096-039: */ 0, 0, 0, 0, 0, 0, 0, 0,
   37    /* 104-039: */ 0, 0, 0, 0, 0, 0, 0, 0,
   38    /* 112-039: */ 0, 0, 0, 0, 0, 0, 0, 0,
   39    /* 127 = delete */
   40    /* 120-127: */ 0, 0, 0, 0, 0, 0, 0, 1,
   41    /* 8-bit chars now - most are binary */
   42    /* 128 = Word space */
   43    /* 128-135: */ 0, 1, 1, 1, 1, 1, 1, 1, 
   44    /* 136-143: */ 1, 1, 1, 1, 1, 1, 1, 1, 
   45    /* 145 and 146 = Word apostrophe, 148 and 150 = Word dash */
   46    /* 144-151: */ 1, 0, 0, 1, 0, 1, 0, 1, 
   47    /* 152-159: */ 1, 1, 1, 1, 1, 1, 1, 1, 
   48    /* 163 = Pound sterling */
   49    /* 160-167: */ 1, 1, 1, 0, 1, 1, 1, 1, 
   50    /* 168-175: */ 1, 1, 1, 1, 1, 1, 1, 1, 
   51    /* 178 and 179 = Word double quote */
   52    /* 176-183: */ 1, 1, 0, 0, 1, 1, 1, 1, 
   53    /* 185 = Word apostrophe */
   54    /* 184-191: */ 1, 0, 1, 1, 1, 1, 1, 1, 
   55    /* 192-199: */ 1, 1, 1, 1, 1, 1, 1, 1, 
   56    /* 200-207: */ 1, 1, 1, 1, 1, 1, 1, 1, 
   57    /* 208-215: */ 1, 1, 1, 1, 1, 1, 1, 1, 
   58    /* 216-223: */ 1, 1, 1, 1, 1, 1, 1, 1, 
   59    /* 226 = Word space */
   60    /* 224-231: */ 1, 1, 0, 1, 1, 1, 1, 1, 
   61    /* 232-239: */ 1, 1, 1, 1, 1, 1, 1, 1, 
   62    /* 240-247: */ 1, 1, 1, 1, 1, 1, 1, 1, 
   63    /* 248-255: */ 1, 1, 1, 1, 1, 1, 1, 1
   64 };
   65 
   66 #ifdef __STDC__
   67 int is_binary(FILE *fhand)
   68 #else
   69 int is_binary(fhand)
   70 FILE *fhand;
   71 #endif
   72 {
   73    /* Given a freshly opened input file (with file pointer at start of
   74       file), determine if any of the first X bytes [X = length of file or
   75       256, whichever is the smaller] contain any binary codes. Return 1 if
   76       they do, otherwise return 0. If there's an error, issue a warning and
   77       return 0 for text.
   78    */
   79    int retval=0;
   80    if (autodetect)
   81    {
   82       binchunkptr=alloc_mem(binchunkptr,&binchunksize,MAX_BIN_BYTES);
   83       autobinsize=fread((void *)binchunkptr,1,MAX_BIN_BYTES,fhand);
   84       if (ferror(fhand))
   85          (void)fprintf(stderr,"WARNING: Input unreadable (assuming text data)\n");
   86       else
   87       if (autobinsize)
   88       {
   89          /* In theory, we could "rewind(fhand);" here to return the open
   90             file pointer back to the start of the file. Sadly, however, it
   91             doesn't work for stdin, so we have to re-use the binchunkptr
   92             buffer when we read the file for real. This is easy for binary
   93             reads (there's a start offset in the routine, so we set it to
   94             autobinsize), but tricky for the text reads (fgets() has to be
   95             simulated - see replace_fgets() in text.c) */
   96          size_t rloop;
   97          for (rloop=0;rloop<autobinsize && !retval;rloop++)
   98          if (bincodes[(unsigned char)binchunkptr[rloop]]) retval=1;
   99       }
  100    }
  101    else
  102    {
  103       retval=binary;
  104       autobinsize=0;
  105    }
  106    if (retval) (void)strcpy(filetype,"binary file");
  107    else (void)strcpy(filetype,"text file");
  108    autobinread=0; /* No bytes read from auto-detect buffer yet */
  109    return(retval);
  110 }