"Fossies" - the Fresh Open Source Software Archive 
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
1 /* auto.c - The automatic binary vs. text detecting code
2 (C) Richard K. Lloyd 2001-2004
3 */
4
5 #define Extern extern
6 #include "replace.h"
7
8 /* Binary codes array - determines which of the 256 possible char
9 values are binary bytes (would never appear in a text file). This is
10 complicated by the existence of 8-bit chars of course - if people want
11 to strongly argue that a char is text and not binary (or vice versa),
12 e-mail replace@richardlloyd.org.uk and we'll discuss flipping that byte's
13 status here in a future release. For the moment, I'm sticking to 7-bit
14 viewable chars, plus selected 8-bit chars that annoying Word users drop
15 happily into their "text" files when they convert them into text or HTML */
16
17 static int bincodes[256]=
18 /* 0 = It's a text char, 1 it's a binary char */
19 {
20 /* Control codes first, most of which are binary */
21 /* 000-007: */ 1, 1, 1, 1, 1, 1, 1, 1,
22 /* 9 is a tab character, 10 is a line feed and 13 is carriage return */
23 /* 008-015: */ 1, 0, 0, 1, 1, 0, 1, 1,
24 /* 016-023: */ 1, 1, 1, 1, 1, 1, 1, 1,
25 /* Idiotic DOS text files use CTRL-Z (26) to terminate ! */
26 /* 024-031: */ 1, 1, 0, 1, 1, 1, 1, 1,
27 /* We're now into text chars (32=space through to 126=tilde) */
28 /* 032-039: */ 0, 0, 0, 0, 0, 0, 0, 0,
29 /* 040-039: */ 0, 0, 0, 0, 0, 0, 0, 0,
30 /* 048-039: */ 0, 0, 0, 0, 0, 0, 0, 0,
31 /* 056-039: */ 0, 0, 0, 0, 0, 0, 0, 0,
32 /* 064-039: */ 0, 0, 0, 0, 0, 0, 0, 0,
33 /* 072-039: */ 0, 0, 0, 0, 0, 0, 0, 0,
34 /* 080-039: */ 0, 0, 0, 0, 0, 0, 0, 0,
35 /* 088-039: */ 0, 0, 0, 0, 0, 0, 0, 0,
36 /* 096-039: */ 0, 0, 0, 0, 0, 0, 0, 0,
37 /* 104-039: */ 0, 0, 0, 0, 0, 0, 0, 0,
38 /* 112-039: */ 0, 0, 0, 0, 0, 0, 0, 0,
39 /* 127 = delete */
40 /* 120-127: */ 0, 0, 0, 0, 0, 0, 0, 1,
41 /* 8-bit chars now - most are binary */
42 /* 128 = Word space */
43 /* 128-135: */ 0, 1, 1, 1, 1, 1, 1, 1,
44 /* 136-143: */ 1, 1, 1, 1, 1, 1, 1, 1,
45 /* 145 and 146 = Word apostrophe, 148 and 150 = Word dash */
46 /* 144-151: */ 1, 0, 0, 1, 0, 1, 0, 1,
47 /* 152-159: */ 1, 1, 1, 1, 1, 1, 1, 1,
48 /* 163 = Pound sterling */
49 /* 160-167: */ 1, 1, 1, 0, 1, 1, 1, 1,
50 /* 168-175: */ 1, 1, 1, 1, 1, 1, 1, 1,
51 /* 178 and 179 = Word double quote */
52 /* 176-183: */ 1, 1, 0, 0, 1, 1, 1, 1,
53 /* 185 = Word apostrophe */
54 /* 184-191: */ 1, 0, 1, 1, 1, 1, 1, 1,
55 /* 192-199: */ 1, 1, 1, 1, 1, 1, 1, 1,
56 /* 200-207: */ 1, 1, 1, 1, 1, 1, 1, 1,
57 /* 208-215: */ 1, 1, 1, 1, 1, 1, 1, 1,
58 /* 216-223: */ 1, 1, 1, 1, 1, 1, 1, 1,
59 /* 226 = Word space */
60 /* 224-231: */ 1, 1, 0, 1, 1, 1, 1, 1,
61 /* 232-239: */ 1, 1, 1, 1, 1, 1, 1, 1,
62 /* 240-247: */ 1, 1, 1, 1, 1, 1, 1, 1,
63 /* 248-255: */ 1, 1, 1, 1, 1, 1, 1, 1
64 };
65
66 #ifdef __STDC__
67 int is_binary(FILE *fhand)
68 #else
69 int is_binary(fhand)
70 FILE *fhand;
71 #endif
72 {
73 /* Given a freshly opened input file (with file pointer at start of
74 file), determine if any of the first X bytes [X = length of file or
75 256, whichever is the smaller] contain any binary codes. Return 1 if
76 they do, otherwise return 0. If there's an error, issue a warning and
77 return 0 for text.
78 */
79 int retval=0;
80 if (autodetect)
81 {
82 binchunkptr=alloc_mem(binchunkptr,&binchunksize,MAX_BIN_BYTES);
83 autobinsize=fread((void *)binchunkptr,1,MAX_BIN_BYTES,fhand);
84 if (ferror(fhand))
85 (void)fprintf(stderr,"WARNING: Input unreadable (assuming text data)\n");
86 else
87 if (autobinsize)
88 {
89 /* In theory, we could "rewind(fhand);" here to return the open
90 file pointer back to the start of the file. Sadly, however, it
91 doesn't work for stdin, so we have to re-use the binchunkptr
92 buffer when we read the file for real. This is easy for binary
93 reads (there's a start offset in the routine, so we set it to
94 autobinsize), but tricky for the text reads (fgets() has to be
95 simulated - see replace_fgets() in text.c) */
96 size_t rloop;
97 for (rloop=0;rloop<autobinsize && !retval;rloop++)
98 if (bincodes[(unsigned char)binchunkptr[rloop]]) retval=1;
99 }
100 }
101 else
102 {
103 retval=binary;
104 autobinsize=0;
105 }
106 if (retval) (void)strcpy(filetype,"binary file");
107 else (void)strcpy(filetype,"text file");
108 autobinread=0; /* No bytes read from auto-detect buffer yet */
109 return(retval);
110 }