"Fossies" - the Fresh Open Source Software Archive 
Member "ctok.c" (9 May 1995, 16712 Bytes) of package /linux/misc/old/cpost.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "ctok.c" see the
Fossies "Dox" file reference documentation.
1 /*------------------------------------------------------------------
2 * ctok : C language tokenizer
3 *------------------------------------------------------------------
4 * 10-01-91 Patrick J. Mueller
5 *------------------------------------------------------------------*/
6
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <ctype.h>
11
12 #include "ctok.h"
13
14 /*------------------------------------------------------------------
15 * is a character a valid character in a C identifier
16 *------------------------------------------------------------------*/
17 #define isCsymbol(c) (isalnum(c) || ('_' == c))
18
19 /*------------------------------------------------------------------
20 * typedefs
21 *------------------------------------------------------------------*/
22 typedef struct
23 {
24 int eof;
25 char *buffer;
26 long bufferLen;
27 long bufferInd;
28 long fileOffs;
29 long line;
30 int unGetChar;
31 int unGetReady;
32 long tokOffs;
33 long tokLen;
34 CTokRead readFunc;
35 void *readInfo;
36 char ident[MAX_IDENT_LEN+1];
37 } CTokInfo;
38
39 /*-/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\-*/
40 /*-\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/-*/
41
42 /*------------------------------------------------------------------
43 * get next char from file
44 *------------------------------------------------------------------*/
45 void GetNextChar(
46 int *c,
47 CTokInfo *cti
48 )
49 {
50 cti->fileOffs++;
51
52 /*---------------------------------------------------------------
53 * check for end of file
54 *---------------------------------------------------------------*/
55 if (cti->eof)
56 {
57 *c = EOF;
58 return;
59 }
60
61 /*---------------------------------------------------------------
62 * check for a char in the unget holder
63 *---------------------------------------------------------------*/
64 if (cti->unGetReady)
65 {
66 cti->unGetReady = 0;
67 *c = cti->unGetChar;
68
69 if ('\n' == *c)
70 cti->line++;
71 return;
72 }
73
74 /*---------------------------------------------------------------
75 * see if we need to read another buffer
76 *---------------------------------------------------------------*/
77 if (cti->bufferInd == cti->bufferLen)
78 {
79 cti->bufferLen = cti->readFunc(cti->readInfo,&(cti->buffer));
80 cti->bufferInd = 0L;
81
82 if (0L == cti->bufferLen)
83 {
84 *c = EOF;
85 cti->eof = 1;
86 return;
87 }
88 }
89
90 /*---------------------------------------------------------------
91 * read character from buffer
92 *---------------------------------------------------------------*/
93 *c = cti->buffer[cti->bufferInd++];
94
95 if ('\n' == *c)
96 cti->line++;
97
98 return;
99 }
100
101 /*------------------------------------------------------------------
102 * put back last char from file
103 *------------------------------------------------------------------*/
104 void UnGetNextChar(
105 int c,
106 CTokInfo *cti
107 )
108 {
109 cti->fileOffs--;
110
111 cti->unGetChar = c;
112 cti->unGetReady = 1;
113
114 if ('\n' == c)
115 cti->line--;
116 }
117
118 /*-/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\-*/
119 /*-\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/-*/
120
121 /*------------------------------------------------------------------
122 * read a C character constant or string
123 *------------------------------------------------------------------*/
124 static void ReadString(
125 CTokInfo *cti,
126 int c
127 )
128 {
129 int stop;
130
131 /*---------------------------------------------------------------
132 * the character passed in is ' or ", and it is the character that
133 * signifies the end of the string
134 *---------------------------------------------------------------*/
135 stop = c;
136
137 /*---------------------------------------------------------------
138 * keep going until we hit our stop character
139 *---------------------------------------------------------------*/
140 GetNextChar(&c,cti);
141 while (stop != c)
142 {
143 /*------------------------------------------------------------
144 * for a \, inhale next character
145 *------------------------------------------------------------*/
146 if ('\\' == c)
147 GetNextChar(&c,cti);
148
149 /*------------------------------------------------------------
150 * for EOF, break
151 *------------------------------------------------------------*/
152 if (EOF == c)
153 break;
154
155 GetNextChar(&c,cti);
156 }
157
158 return;
159 }
160
161 /*------------------------------------------------------------------
162 * read a C comment
163 *------------------------------------------------------------------*/
164 static void ReadComment(
165 CTokInfo *cti
166 )
167 {
168 int c;
169
170 /*---------------------------------------------------------------
171 * loop until end of file (or return in middle)
172 *---------------------------------------------------------------*/
173 GetNextChar(&c,cti);
174 while (EOF != c)
175 {
176
177 /*------------------------------------------------------------
178 * if not *, just get next character
179 *------------------------------------------------------------*/
180 if ('*' != c)
181 GetNextChar(&c,cti);
182
183 /*------------------------------------------------------------
184 * got a * - see if next is /
185 *------------------------------------------------------------*/
186 else
187 {
188 /*---------------------------------------------------------
189 * if next is /, return
190 *---------------------------------------------------------*/
191 GetNextChar(&c,cti);
192 if ('/' == c)
193 return;
194 }
195
196 }
197
198 return;
199 }
200
201 /*------------------------------------------------------------------
202 * read a C++ style comment
203 *------------------------------------------------------------------*/
204 static void ReadCppComment(
205 CTokInfo *cti
206 )
207 {
208 int c;
209
210 /*---------------------------------------------------------------
211 * loop until end of line or end of file
212 *---------------------------------------------------------------*/
213 GetNextChar(&c,cti);
214
215 while ((EOF != c) && ('\n' != c))
216 GetNextChar(&c,cti);
217
218 UnGetNextChar(c,cti);
219 return;
220 }
221
222 /*------------------------------------------------------------------
223 * read an identifier
224 *------------------------------------------------------------------*/
225 static void ReadIdent(
226 CTokInfo *cti,
227 int c
228 )
229 {
230 int identLen;
231
232 /*---------------------------------------------------------------
233 * initialize length and stick first char in
234 *---------------------------------------------------------------*/
235 identLen = 0;
236 cti->ident[identLen++] = (char) c;
237
238 /*---------------------------------------------------------------
239 * while still a valid symbol character ...
240 *---------------------------------------------------------------*/
241 GetNextChar(&c,cti);
242 while (isCsymbol(c))
243 {
244 /*------------------------------------------------------------
245 * make sure we got enough room, then stick it in
246 *------------------------------------------------------------*/
247 if (identLen < MAX_IDENT_LEN)
248 cti->ident[identLen++] = (char) c;
249
250 GetNextChar(&c,cti);
251 }
252
253 /*---------------------------------------------------------------
254 * finish up identifier, put last character back
255 *---------------------------------------------------------------*/
256 cti->ident[identLen] = '\0';
257 UnGetNextChar(c,cti);
258 }
259
260 /*------------------------------------------------------------------
261 * read a number
262 *------------------------------------------------------------------*/
263 static void ReadNumber(
264 CTokInfo *cti,
265 int c
266 )
267 {
268
269 /*---------------------------------------------------------------
270 * while still a valid number character ...
271 *---------------------------------------------------------------*/
272 GetNextChar(&c,cti);
273 while (isalnum(c))
274 GetNextChar(&c,cti);
275
276 /*---------------------------------------------------------------
277 * put last character back
278 *---------------------------------------------------------------*/
279 UnGetNextChar(c,cti);
280 }
281
282 /*------------------------------------------------------------------
283 * read a preprocessor statement
284 *------------------------------------------------------------------*/
285 static void ReadPreprocessor(
286 CTokInfo *cti
287 )
288 {
289 int c;
290
291 /*---------------------------------------------------------------
292 * loop until end of file (or return in middle)
293 *---------------------------------------------------------------*/
294 GetNextChar(&c,cti);
295 while (EOF != c)
296 {
297 /*------------------------------------------------------------
298 * if we found a newline, leave
299 *------------------------------------------------------------*/
300 if ('\n' == c)
301 {
302 UnGetNextChar(c,cti);
303 return;
304 }
305
306 /*------------------------------------------------------------
307 * if we got anything but a \, eat it
308 *------------------------------------------------------------*/
309 else if ('\\' != c)
310 GetNextChar(&c,cti);
311
312 /*------------------------------------------------------------
313 * got a \ - see if next is \n
314 *------------------------------------------------------------*/
315 else
316 {
317 /*---------------------------------------------------------
318 * if next isn't \n, start at top of loop
319 *---------------------------------------------------------*/
320 GetNextChar(&c,cti);
321
322 /*---------------------------------------------------------
323 * skip over white space first
324 *---------------------------------------------------------*/
325 while (isspace(c) && ('\n' != c))
326 GetNextChar(&c,cti);
327
328 if ('\n' != c)
329 continue;
330
331 /*---------------------------------------------------------
332 * if it is a \n, read next char and continue
333 *---------------------------------------------------------*/
334 GetNextChar(&c,cti);
335 continue;
336 }
337
338 }
339
340 return;
341 }
342
343 /*-/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\-*/
344 /*-\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/-*/
345
346 /*------------------------------------------------------------------
347 * tokenizer
348 *------------------------------------------------------------------*/
349
350 static int GetToken(
351 CTokInfo *cti
352 )
353 {
354 int c;
355 int type;
356 unsigned long offsStart;
357
358 /*---------------------------------------------------------------
359 * read next character
360 *---------------------------------------------------------------*/
361 GetNextChar(&c,cti);
362
363 /*---------------------------------------------------------------
364 * skip white space
365 *---------------------------------------------------------------*/
366 while (isspace(c))
367 GetNextChar(&c,cti);
368
369 /*---------------------------------------------------------------
370 * save starting offset
371 *---------------------------------------------------------------*/
372 offsStart = cti->fileOffs;
373
374 /*---------------------------------------------------------------
375 * empty identifier
376 *---------------------------------------------------------------*/
377 memset(cti->ident,'\0',sizeof(cti->ident));
378
379 /*---------------------------------------------------------------
380 * big switch on it's value
381 *---------------------------------------------------------------*/
382 switch(c)
383 {
384 /*------------------------------------------------------------
385 * check for end of file
386 *------------------------------------------------------------*/
387 case EOF:
388 type = TOKEN_EOF;
389 break;
390
391 /*------------------------------------------------------------
392 * for pound sign, read preprocessor directive
393 *------------------------------------------------------------*/
394 case '#':
395 ReadPreprocessor(cti);
396 type = TOKEN_PREPROC;
397 break;
398
399 /*------------------------------------------------------------
400 * single or double quote
401 *------------------------------------------------------------*/
402 case '\'':
403 case '"':
404 ReadString(cti,c);
405 type = TOKEN_STRING;
406 break;
407
408 /*------------------------------------------------------------
409 * start of comment?
410 *------------------------------------------------------------*/
411 case '/':
412 /*---------------------------------------------------------
413 * get next char - if *, read to end of comment
414 *---------------------------------------------------------*/
415 GetNextChar(&c,cti);
416 if ('*' == c)
417 {
418 ReadComment(cti);
419 type = TOKEN_COMMENT;
420 }
421
422 /*---------------------------------------------------------
423 * see if it's a C++ style comment
424 *---------------------------------------------------------*/
425 else if ('/' == c)
426 {
427 ReadCppComment(cti);
428 type = TOKEN_COMMENT;
429 }
430
431 /*---------------------------------------------------------
432 * otherwise it's just a plain /
433 *---------------------------------------------------------*/
434 else
435 {
436 UnGetNextChar(c,cti);
437 type = TOKEN_OPER;
438 }
439
440 break;
441
442 /*------------------------------------------------------------
443 * everything else - identifiers and punctuation
444 *------------------------------------------------------------*/
445 default:
446 if (isCsymbol(c) && !isdigit(c))
447 {
448 ReadIdent(cti,c);
449 type = TOKEN_IDENT;
450 }
451
452 else if (isdigit(c))
453 {
454 ReadNumber(cti,c);
455 type = TOKEN_NUMBER;
456 }
457
458 /*---------------------------------------------------------
459 * anything else
460 *---------------------------------------------------------*/
461 else
462 {
463 type = TOKEN_OPER;
464 cti->ident[0] = (char) c;
465 }
466
467 break;
468 }
469
470 cti->tokOffs = offsStart;
471 cti->tokLen = cti->fileOffs - offsStart + 1;
472 return(type);
473 }
474
475 /*-/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\-*/
476 /*-\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/-*/
477
478
479 /*------------------------------------------------------------------
480 * Initializer
481 *------------------------------------------------------------------*/
482 void *CTokInit(
483 CTokRead readFunc,
484 void *readInfo
485 )
486 {
487 CTokInfo *cti;
488
489 /*---------------------------------------------------------------
490 * allocate space for structure
491 *---------------------------------------------------------------*/
492 cti = malloc(sizeof(CTokInfo));
493 if (NULL == cti)
494 return NULL;
495
496 /*---------------------------------------------------------------
497 * initialize structure
498 *---------------------------------------------------------------*/
499 cti->eof = 0;
500 cti->buffer = NULL;
501 cti->bufferLen = 0L;
502 cti->bufferInd = 0L;
503 cti->fileOffs = -1L;
504 cti->line = 1;
505 cti->unGetChar = '\0';
506 cti->unGetReady = 0;
507 cti->tokOffs = 0L;
508 cti->tokLen = 0L;
509 cti->readFunc = readFunc;
510 cti->readInfo = readInfo;
511 memset(cti->ident,'\0',sizeof(cti->ident));
512
513 return cti;
514 }
515
516 /*-/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\-*/
517 /*-\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/-*/
518
519 /*------------------------------------------------------------------
520 * Terminator
521 *------------------------------------------------------------------*/
522 void CTokTerm(
523 void *handle
524 )
525 {
526 free(handle);
527 }
528
529 /*-/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\-*/
530 /*-\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/-*/
531
532 /*------------------------------------------------------------------
533 * Tokenizer
534 *------------------------------------------------------------------*/
535 void CTokGet(
536 void *handle,
537 Token *token
538 )
539 {
540 CTokInfo *cti;
541
542 cti = handle;
543
544 token->type = GetToken(cti);
545 token->offs = cti->tokOffs;
546 token->len = cti->tokLen;
547 token->ident = cti->ident;
548 token->line = cti->line;
549 }