"Fossies" - the Fresh Open Source Software Archive 
Member "ragel-6.10/ragel/parsedata.cpp" (24 Mar 2017, 42382 Bytes) of package /linux/misc/ragel-6.10.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "parsedata.cpp" see the
Fossies "Dox" file reference documentation and the latest
Fossies "Diffs" side-by-side code changes report:
6.9_vs_6.10.
1 /*
2 * Copyright 2001-2008 Adrian Thurston <thurston@complang.org>
3 */
4
5 /* This file is part of Ragel.
6 *
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21
22 #include <iostream>
23 #include <iomanip>
24 #include <errno.h>
25 #include <stdlib.h>
26 #include <limits.h>
27
28 #include "ragel.h"
29 #include "rlparse.h"
30 #include "parsedata.h"
31 #include "parsetree.h"
32 #include "mergesort.h"
33 #include "xmlcodegen.h"
34 #include "version.h"
35 #include "inputdata.h"
36
37 using namespace std;
38
39 char mainMachine[] = "main";
40
41 void Token::set( const char *str, int len )
42 {
43 length = len;
44 data = new char[len+1];
45 memcpy( data, str, len );
46 data[len] = 0;
47 }
48
49 void Token::append( const Token &other )
50 {
51 int newLength = length + other.length;
52 char *newString = new char[newLength+1];
53 memcpy( newString, data, length );
54 memcpy( newString + length, other.data, other.length );
55 newString[newLength] = 0;
56 data = newString;
57 length = newLength;
58 }
59
60 /* Perform minimization after an operation according
61 * to the command line args. */
62 void afterOpMinimize( FsmAp *fsm, bool lastInSeq )
63 {
64 /* Switch on the prefered minimization algorithm. */
65 if ( minimizeOpt == MinimizeEveryOp || ( minimizeOpt == MinimizeMostOps && lastInSeq ) ) {
66 /* First clean up the graph. FsmAp operations may leave these
67 * lying around. There should be no dead end states. The subtract
68 * intersection operators are the only places where they may be
69 * created and those operators clean them up. */
70 fsm->removeUnreachableStates();
71
72 switch ( minimizeLevel ) {
73 case MinimizeApprox:
74 fsm->minimizeApproximate();
75 break;
76 case MinimizePartition1:
77 fsm->minimizePartition1();
78 break;
79 case MinimizePartition2:
80 fsm->minimizePartition2();
81 break;
82 case MinimizeStable:
83 fsm->minimizeStable();
84 break;
85 }
86 }
87 }
88
89 /* Count the transitions in the fsm by walking the state list. */
90 int countTransitions( FsmAp *fsm )
91 {
92 int numTrans = 0;
93 StateAp *state = fsm->stateList.head;
94 while ( state != 0 ) {
95 numTrans += state->outList.length();
96 state = state->next;
97 }
98 return numTrans;
99 }
100
101 Key makeFsmKeyHex( char *str, const InputLoc &loc, ParseData *pd )
102 {
103 /* Reset errno so we can check for overflow or underflow. In the event of
104 * an error, sets the return val to the upper or lower bound being tested
105 * against. */
106 errno = 0;
107 unsigned int size = keyOps->alphType->size;
108 bool unusedBits = size < sizeof(unsigned long);
109
110 unsigned long ul = strtoul( str, 0, 16 );
111
112 if ( errno == ERANGE || ( unusedBits && ul >> (size * 8) ) ) {
113 error(loc) << "literal " << str << " overflows the alphabet type" << endl;
114 ul = 1 << (size * 8);
115 }
116
117 if ( unusedBits && keyOps->alphType->isSigned && ul >> (size * 8 - 1) )
118 ul |= ( -1L >> (size*8) ) << (size*8);
119
120 return Key( (long)ul );
121 }
122
123 Key makeFsmKeyDec( char *str, const InputLoc &loc, ParseData *pd )
124 {
125 if ( keyOps->alphType->isSigned ) {
126 /* Convert the number to a decimal. First reset errno so we can check
127 * for overflow or underflow. */
128 errno = 0;
129 long long minVal = keyOps->alphType->sMinVal;
130 long long maxVal = keyOps->alphType->sMaxVal;
131
132 long long ll = strtoll( str, 0, 10 );
133
134 /* Check for underflow. */
135 if ( ( errno == ERANGE && ll < 0 ) || ll < minVal) {
136 error(loc) << "literal " << str << " underflows the alphabet type" << endl;
137 ll = minVal;
138 }
139 /* Check for overflow. */
140 else if ( ( errno == ERANGE && ll > 0 ) || ll > maxVal ) {
141 error(loc) << "literal " << str << " overflows the alphabet type" << endl;
142 ll = maxVal;
143 }
144
145 return Key( (long)ll );
146 }
147 else {
148 /* Convert the number to a decimal. First reset errno so we can check
149 * for overflow or underflow. */
150 errno = 0;
151 unsigned long long minVal = keyOps->alphType->uMinVal;
152 unsigned long long maxVal = keyOps->alphType->uMaxVal;
153
154 unsigned long long ull = strtoull( str, 0, 10 );
155
156 /* Check for underflow. */
157 if ( ( errno == ERANGE && ull < 0 ) || ull < minVal) {
158 error(loc) << "literal " << str << " underflows the alphabet type" << endl;
159 ull = minVal;
160 }
161 /* Check for overflow. */
162 else if ( ( errno == ERANGE && ull > 0 ) || ull > maxVal ) {
163 error(loc) << "literal " << str << " overflows the alphabet type" << endl;
164 ull = maxVal;
165 }
166
167 return Key( (unsigned long)ull );
168 }
169 }
170
171 /* Make an fsm key in int format (what the fsm graph uses) from an alphabet
172 * number returned by the parser. Validates that the number doesn't overflow
173 * the alphabet type. */
174 Key makeFsmKeyNum( char *str, const InputLoc &loc, ParseData *pd )
175 {
176 /* Switch on hex/decimal format. */
177 if ( str[0] == '0' && str[1] == 'x' )
178 return makeFsmKeyHex( str, loc, pd );
179 else
180 return makeFsmKeyDec( str, loc, pd );
181 }
182
183 /* Make an fsm int format (what the fsm graph uses) from a single character.
184 * Performs proper conversion depending on signed/unsigned property of the
185 * alphabet. */
186 Key makeFsmKeyChar( char c, ParseData *pd )
187 {
188 if ( keyOps->isSigned ) {
189 /* Copy from a char type. */
190 return Key( c );
191 }
192 else {
193 /* Copy from an unsigned byte type. */
194 return Key( (unsigned char)c );
195 }
196 }
197
198 /* Make an fsm key array in int format (what the fsm graph uses) from a string
199 * of characters. Performs proper conversion depending on signed/unsigned
200 * property of the alphabet. */
201 void makeFsmKeyArray( Key *result, char *data, int len, ParseData *pd )
202 {
203 if ( keyOps->isSigned ) {
204 /* Copy from a char star type. */
205 char *src = data;
206 for ( int i = 0; i < len; i++ )
207 result[i] = Key(src[i]);
208 }
209 else {
210 /* Copy from an unsigned byte ptr type. */
211 unsigned char *src = (unsigned char*) data;
212 for ( int i = 0; i < len; i++ )
213 result[i] = Key(src[i]);
214 }
215 }
216
217 /* Like makeFsmKeyArray except the result has only unique keys. They ordering
218 * will be changed. */
219 void makeFsmUniqueKeyArray( KeySet &result, char *data, int len,
220 bool caseInsensitive, ParseData *pd )
221 {
222 /* Use a transitions list for getting unique keys. */
223 if ( keyOps->isSigned ) {
224 /* Copy from a char star type. */
225 char *src = data;
226 for ( int si = 0; si < len; si++ ) {
227 Key key( src[si] );
228 result.insert( key );
229 if ( caseInsensitive ) {
230 if ( key.isLower() )
231 result.insert( key.toUpper() );
232 else if ( key.isUpper() )
233 result.insert( key.toLower() );
234 }
235 }
236 }
237 else {
238 /* Copy from an unsigned byte ptr type. */
239 unsigned char *src = (unsigned char*) data;
240 for ( int si = 0; si < len; si++ ) {
241 Key key( src[si] );
242 result.insert( key );
243 if ( caseInsensitive ) {
244 if ( key.isLower() )
245 result.insert( key.toUpper() );
246 else if ( key.isUpper() )
247 result.insert( key.toLower() );
248 }
249 }
250 }
251 }
252
253 FsmAp *dotFsm( ParseData *pd )
254 {
255 FsmAp *retFsm = new FsmAp();
256 retFsm->rangeFsm( keyOps->minKey, keyOps->maxKey );
257 return retFsm;
258 }
259
260 FsmAp *dotStarFsm( ParseData *pd )
261 {
262 FsmAp *retFsm = new FsmAp();
263 retFsm->rangeStarFsm( keyOps->minKey, keyOps->maxKey );
264 return retFsm;
265 }
266
267 /* Make a builtin type. Depends on the signed nature of the alphabet type. */
268 FsmAp *makeBuiltin( BuiltinMachine builtin, ParseData *pd )
269 {
270 /* FsmAp created to return. */
271 FsmAp *retFsm = 0;
272 bool isSigned = keyOps->isSigned;
273
274 switch ( builtin ) {
275 case BT_Any: {
276 /* All characters. */
277 retFsm = dotFsm( pd );
278 break;
279 }
280 case BT_Ascii: {
281 /* Ascii characters 0 to 127. */
282 retFsm = new FsmAp();
283 retFsm->rangeFsm( 0, 127 );
284 break;
285 }
286 case BT_Extend: {
287 /* Ascii extended characters. This is the full byte range. Dependent
288 * on signed, vs no signed. If the alphabet is one byte then just use
289 * dot fsm. */
290 if ( isSigned ) {
291 retFsm = new FsmAp();
292 retFsm->rangeFsm( -128, 127 );
293 }
294 else {
295 retFsm = new FsmAp();
296 retFsm->rangeFsm( 0, 255 );
297 }
298 break;
299 }
300 case BT_Alpha: {
301 /* Alpha [A-Za-z]. */
302 FsmAp *upper = new FsmAp(), *lower = new FsmAp();
303 upper->rangeFsm( 'A', 'Z' );
304 lower->rangeFsm( 'a', 'z' );
305 upper->unionOp( lower );
306 upper->minimizePartition2();
307 retFsm = upper;
308 break;
309 }
310 case BT_Digit: {
311 /* Digits [0-9]. */
312 retFsm = new FsmAp();
313 retFsm->rangeFsm( '0', '9' );
314 break;
315 }
316 case BT_Alnum: {
317 /* Alpha numerics [0-9A-Za-z]. */
318 FsmAp *digit = new FsmAp(), *lower = new FsmAp();
319 FsmAp *upper = new FsmAp();
320 digit->rangeFsm( '0', '9' );
321 upper->rangeFsm( 'A', 'Z' );
322 lower->rangeFsm( 'a', 'z' );
323 digit->unionOp( upper );
324 digit->unionOp( lower );
325 digit->minimizePartition2();
326 retFsm = digit;
327 break;
328 }
329 case BT_Lower: {
330 /* Lower case characters. */
331 retFsm = new FsmAp();
332 retFsm->rangeFsm( 'a', 'z' );
333 break;
334 }
335 case BT_Upper: {
336 /* Upper case characters. */
337 retFsm = new FsmAp();
338 retFsm->rangeFsm( 'A', 'Z' );
339 break;
340 }
341 case BT_Cntrl: {
342 /* Control characters. */
343 FsmAp *cntrl = new FsmAp();
344 FsmAp *highChar = new FsmAp();
345 cntrl->rangeFsm( 0, 31 );
346 highChar->concatFsm( 127 );
347 cntrl->unionOp( highChar );
348 cntrl->minimizePartition2();
349 retFsm = cntrl;
350 break;
351 }
352 case BT_Graph: {
353 /* Graphical ascii characters [!-~]. */
354 retFsm = new FsmAp();
355 retFsm->rangeFsm( '!', '~' );
356 break;
357 }
358 case BT_Print: {
359 /* Printable characters. Same as graph except includes space. */
360 retFsm = new FsmAp();
361 retFsm->rangeFsm( ' ', '~' );
362 break;
363 }
364 case BT_Punct: {
365 /* Punctuation. */
366 FsmAp *range1 = new FsmAp();
367 FsmAp *range2 = new FsmAp();
368 FsmAp *range3 = new FsmAp();
369 FsmAp *range4 = new FsmAp();
370 range1->rangeFsm( '!', '/' );
371 range2->rangeFsm( ':', '@' );
372 range3->rangeFsm( '[', '`' );
373 range4->rangeFsm( '{', '~' );
374 range1->unionOp( range2 );
375 range1->unionOp( range3 );
376 range1->unionOp( range4 );
377 range1->minimizePartition2();
378 retFsm = range1;
379 break;
380 }
381 case BT_Space: {
382 /* Whitespace: [\t\v\f\n\r ]. */
383 FsmAp *cntrl = new FsmAp();
384 FsmAp *space = new FsmAp();
385 cntrl->rangeFsm( '\t', '\r' );
386 space->concatFsm( ' ' );
387 cntrl->unionOp( space );
388 cntrl->minimizePartition2();
389 retFsm = cntrl;
390 break;
391 }
392 case BT_Xdigit: {
393 /* Hex digits [0-9A-Fa-f]. */
394 FsmAp *digit = new FsmAp();
395 FsmAp *upper = new FsmAp();
396 FsmAp *lower = new FsmAp();
397 digit->rangeFsm( '0', '9' );
398 upper->rangeFsm( 'A', 'F' );
399 lower->rangeFsm( 'a', 'f' );
400 digit->unionOp( upper );
401 digit->unionOp( lower );
402 digit->minimizePartition2();
403 retFsm = digit;
404 break;
405 }
406 case BT_Lambda: {
407 retFsm = new FsmAp();
408 retFsm->lambdaFsm();
409 break;
410 }
411 case BT_Empty: {
412 retFsm = new FsmAp();
413 retFsm->emptyFsm();
414 break;
415 }}
416
417 return retFsm;
418 }
419
420 /* Check if this name inst or any name inst below is referenced. */
421 bool NameInst::anyRefsRec()
422 {
423 if ( numRefs > 0 )
424 return true;
425
426 /* Recurse on children until true. */
427 for ( NameVect::Iter ch = childVect; ch.lte(); ch++ ) {
428 if ( (*ch)->anyRefsRec() )
429 return true;
430 }
431
432 return false;
433 }
434
435 /*
436 * ParseData
437 */
438
439 /* Initialize the structure that will collect info during the parse of a
440 * machine. */
441 ParseData::ParseData( const char *fileName, char *sectionName,
442 const InputLoc §ionLoc )
443 :
444 sectionGraph(0),
445 generatingSectionSubset(false),
446 nextPriorKey(0),
447 /* 0 is reserved for global error actions. */
448 nextLocalErrKey(1),
449 nextNameId(0),
450 nextCondId(0),
451 alphTypeSet(false),
452 getKeyExpr(0),
453 accessExpr(0),
454 prePushExpr(0),
455 postPopExpr(0),
456 pExpr(0),
457 peExpr(0),
458 eofExpr(0),
459 csExpr(0),
460 topExpr(0),
461 stackExpr(0),
462 actExpr(0),
463 tokstartExpr(0),
464 tokendExpr(0),
465 dataExpr(0),
466 lowerNum(0),
467 upperNum(0),
468 fileName(fileName),
469 sectionName(sectionName),
470 sectionLoc(sectionLoc),
471 curActionOrd(0),
472 curPriorOrd(0),
473 rootName(0),
474 exportsRootName(0),
475 nextEpsilonResolvedLink(0),
476 nextLongestMatchId(1),
477 lmRequiresErrorState(false),
478 cgd(0)
479 {
480 /* Initialize the dictionary of graphs. This is our symbol table. The
481 * initialization needs to be done on construction which happens at the
482 * beginning of a machine spec so any assignment operators can reference
483 * the builtins. */
484 initGraphDict();
485 }
486
487 /* Clean up the data collected during a parse. */
488 ParseData::~ParseData()
489 {
490 /* Delete all the nodes in the action list. Will cause all the
491 * string data that represents the actions to be deallocated. */
492 actionList.empty();
493 }
494
495 /* Make a name id in the current name instantiation scope if it is not
496 * already there. */
497 NameInst *ParseData::addNameInst( const InputLoc &loc, const char *data, bool isLabel )
498 {
499 /* Create the name instantitaion object and insert it. */
500 NameInst *newNameInst = new NameInst( loc, curNameInst, data, nextNameId++, isLabel );
501 curNameInst->childVect.append( newNameInst );
502 if ( data != 0 )
503 curNameInst->children.insertMulti( data, newNameInst );
504 return newNameInst;
505 }
506
507 void ParseData::initNameWalk()
508 {
509 curNameInst = rootName;
510 curNameChild = 0;
511 }
512
513 void ParseData::initExportsNameWalk()
514 {
515 curNameInst = exportsRootName;
516 curNameChild = 0;
517 }
518
519 /* Goes into the next child scope. The number of the child is already set up.
520 * We need this for the syncronous name tree and parse tree walk to work
521 * properly. It is reset on entry into a scope and advanced on poping of a
522 * scope. A call to enterNameScope should be accompanied by a corresponding
523 * popNameScope. */
524 NameFrame ParseData::enterNameScope( bool isLocal, int numScopes )
525 {
526 /* Save off the current data. */
527 NameFrame retFrame;
528 retFrame.prevNameInst = curNameInst;
529 retFrame.prevNameChild = curNameChild;
530 retFrame.prevLocalScope = localNameScope;
531
532 /* Enter into the new name scope. */
533 for ( int i = 0; i < numScopes; i++ ) {
534 curNameInst = curNameInst->childVect[curNameChild];
535 curNameChild = 0;
536 }
537
538 if ( isLocal )
539 localNameScope = curNameInst;
540
541 return retFrame;
542 }
543
544 /* Return from a child scope to a parent. The parent info must be specified as
545 * an argument and is obtained from the corresponding call to enterNameScope.
546 * */
547 void ParseData::popNameScope( const NameFrame &frame )
548 {
549 /* Pop the name scope. */
550 curNameInst = frame.prevNameInst;
551 curNameChild = frame.prevNameChild+1;
552 localNameScope = frame.prevLocalScope;
553 }
554
555 void ParseData::resetNameScope( const NameFrame &frame )
556 {
557 /* Pop the name scope. */
558 curNameInst = frame.prevNameInst;
559 curNameChild = frame.prevNameChild;
560 localNameScope = frame.prevLocalScope;
561 }
562
563
564 void ParseData::unsetObsoleteEntries( FsmAp *graph )
565 {
566 /* Loop the reference names and increment the usage. Names that are no
567 * longer needed will be unset in graph. */
568 for ( NameVect::Iter ref = curNameInst->referencedNames; ref.lte(); ref++ ) {
569 /* Get the name. */
570 NameInst *name = *ref;
571 name->numUses += 1;
572
573 /* If the name is no longer needed unset its corresponding entry. */
574 if ( name->numUses == name->numRefs ) {
575 assert( graph->entryPoints.find( name->id ) != 0 );
576 graph->unsetEntry( name->id );
577 assert( graph->entryPoints.find( name->id ) == 0 );
578 }
579 }
580 }
581
582 NameSet ParseData::resolvePart( NameInst *refFrom, const char *data, bool recLabelsOnly )
583 {
584 /* Queue needed for breadth-first search, load it with the start node. */
585 NameInstList nameQueue;
586 nameQueue.append( refFrom );
587
588 NameSet result;
589 while ( nameQueue.length() > 0 ) {
590 /* Pull the next from location off the queue. */
591 NameInst *from = nameQueue.detachFirst();
592
593 /* Look for the name. */
594 NameMapEl *low, *high;
595 if ( from->children.findMulti( data, low, high ) ) {
596 /* Record all instances of the name. */
597 for ( ; low <= high; low++ )
598 result.insert( low->value );
599 }
600
601 /* Name not there, do breadth-first operation of appending all
602 * childrent to the processing queue. */
603 for ( NameVect::Iter name = from->childVect; name.lte(); name++ ) {
604 if ( !recLabelsOnly || (*name)->isLabel )
605 nameQueue.append( *name );
606 }
607 }
608
609 /* Queue exhausted and name never found. */
610 return result;
611 }
612
613 void ParseData::resolveFrom( NameSet &result, NameInst *refFrom,
614 const NameRef &nameRef, int namePos )
615 {
616 /* Look for the name in the owning scope of the factor with aug. */
617 NameSet partResult = resolvePart( refFrom, nameRef[namePos], false );
618
619 /* If there are more parts to the name then continue on. */
620 if ( ++namePos < nameRef.length() ) {
621 /* There are more components to the name, search using all the part
622 * results as the base. */
623 for ( NameSet::Iter name = partResult; name.lte(); name++ )
624 resolveFrom( result, *name, nameRef, namePos );
625 }
626 else {
627 /* This is the last component, append the part results to the final
628 * results. */
629 result.insert( partResult );
630 }
631 }
632
633 /* Write out a name reference. */
634 ostream &operator<<( ostream &out, const NameRef &nameRef )
635 {
636 int pos = 0;
637 if ( nameRef[pos] == 0 ) {
638 out << "::";
639 pos += 1;
640 }
641 out << nameRef[pos++];
642 for ( ; pos < nameRef.length(); pos++ )
643 out << "::" << nameRef[pos];
644 return out;
645 }
646
647 ostream &operator<<( ostream &out, const NameInst &nameInst )
648 {
649 /* Count the number fully qualified name parts. */
650 int numParents = 0;
651 NameInst *curParent = nameInst.parent;
652 while ( curParent != 0 ) {
653 numParents += 1;
654 curParent = curParent->parent;
655 }
656
657 /* Make an array and fill it in. */
658 curParent = nameInst.parent;
659 NameInst **parents = new NameInst*[numParents];
660 for ( int p = numParents-1; p >= 0; p-- ) {
661 parents[p] = curParent;
662 curParent = curParent->parent;
663 }
664
665 /* Write the parents out, skip the root. */
666 for ( int p = 1; p < numParents; p++ )
667 out << "::" << ( parents[p]->name != 0 ? parents[p]->name : "<ANON>" );
668
669 /* Write the name and cleanup. */
670 out << "::" << ( nameInst.name != 0 ? nameInst.name : "<ANON>" );
671 delete[] parents;
672 return out;
673 }
674
675 struct CmpNameInstLoc
676 {
677 static int compare( const NameInst *ni1, const NameInst *ni2 )
678 {
679 if ( ni1->loc.line < ni2->loc.line )
680 return -1;
681 else if ( ni1->loc.line > ni2->loc.line )
682 return 1;
683 else if ( ni1->loc.col < ni2->loc.col )
684 return -1;
685 else if ( ni1->loc.col > ni2->loc.col )
686 return 1;
687 return 0;
688 }
689 };
690
691 void errorStateLabels( const NameSet &resolved )
692 {
693 MergeSort<NameInst*, CmpNameInstLoc> mergeSort;
694 mergeSort.sort( resolved.data, resolved.length() );
695 for ( NameSet::Iter res = resolved; res.lte(); res++ )
696 error((*res)->loc) << " -> " << **res << endl;
697 }
698
699
700 NameInst *ParseData::resolveStateRef( const NameRef &nameRef, InputLoc &loc, Action *action )
701 {
702 NameInst *nameInst = 0;
703
704 /* Do the local search if the name is not strictly a root level name
705 * search. */
706 if ( nameRef[0] != 0 ) {
707 /* If the action is referenced, resolve all of them. */
708 if ( action != 0 && action->actionRefs.length() > 0 ) {
709 /* Look for the name in all referencing scopes. */
710 NameSet resolved;
711 for ( ActionRefs::Iter actRef = action->actionRefs; actRef.lte(); actRef++ )
712 resolveFrom( resolved, *actRef, nameRef, 0 );
713
714 if ( resolved.length() > 0 ) {
715 /* Take the first one. */
716 nameInst = resolved[0];
717 if ( resolved.length() > 1 ) {
718 /* Complain about the multiple references. */
719 error(loc) << "state reference " << nameRef <<
720 " resolves to multiple entry points" << endl;
721 errorStateLabels( resolved );
722 }
723 }
724 }
725 }
726
727 /* If not found in the local scope, look in global. */
728 if ( nameInst == 0 ) {
729 NameSet resolved;
730 int fromPos = nameRef[0] != 0 ? 0 : 1;
731 resolveFrom( resolved, rootName, nameRef, fromPos );
732
733 if ( resolved.length() > 0 ) {
734 /* Take the first. */
735 nameInst = resolved[0];
736 if ( resolved.length() > 1 ) {
737 /* Complain about the multiple references. */
738 error(loc) << "state reference " << nameRef <<
739 " resolves to multiple entry points" << endl;
740 errorStateLabels( resolved );
741 }
742 }
743 }
744
745 if ( nameInst == 0 ) {
746 /* If not found then complain. */
747 error(loc) << "could not resolve state reference " << nameRef << endl;
748 }
749 return nameInst;
750 }
751
752 void ParseData::resolveNameRefs( InlineList *inlineList, Action *action )
753 {
754 for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
755 switch ( item->type ) {
756 case InlineItem::Entry: case InlineItem::Goto:
757 case InlineItem::Call: case InlineItem::Next: {
758 /* Resolve, pass action for local search. */
759 NameInst *target = resolveStateRef( *item->nameRef, item->loc, action );
760
761 /* Name lookup error reporting is handled by resolveStateRef. */
762 if ( target != 0 ) {
763 /* Check if the target goes into a longest match. */
764 NameInst *search = target->parent;
765 while ( search != 0 ) {
766 if ( search->isLongestMatch ) {
767 error(item->loc) << "cannot enter inside a longest "
768 "match construction as an entry point" << endl;
769 break;
770 }
771 search = search->parent;
772 }
773
774 /* Record the reference in the name. This will cause the
775 * entry point to survive to the end of the graph
776 * generating walk. */
777 target->numRefs += 1;
778 }
779
780 item->nameTarg = target;
781 break;
782 }
783 default:
784 break;
785 }
786
787 /* Some of the item types may have children. */
788 if ( item->children != 0 )
789 resolveNameRefs( item->children, action );
790 }
791 }
792
793 /* Resolve references to labels in actions. */
794 void ParseData::resolveActionNameRefs()
795 {
796 for ( ActionList::Iter act = actionList; act.lte(); act++ ) {
797 /* Only care about the actions that are referenced. */
798 if ( act->actionRefs.length() > 0 )
799 resolveNameRefs( act->inlineList, act );
800 }
801 }
802
803 /* Walk a name tree starting at from and fill the name index. */
804 void ParseData::fillNameIndex( NameInst *from )
805 {
806 /* Fill the value for from in the name index. */
807 nameIndex[from->id] = from;
808
809 /* Recurse on the implicit final state and then all children. */
810 if ( from->final != 0 )
811 fillNameIndex( from->final );
812 for ( NameVect::Iter name = from->childVect; name.lte(); name++ )
813 fillNameIndex( *name );
814 }
815
816 void ParseData::makeRootNames()
817 {
818 /* Create the root name. */
819 rootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false );
820 exportsRootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false );
821 }
822
823 /* Build the name tree and supporting data structures. */
824 void ParseData::makeNameTree( GraphDictEl *dictEl )
825 {
826 /* Set up curNameInst for the walk. */
827 initNameWalk();
828
829 if ( dictEl != 0 ) {
830 /* A start location has been specified. */
831 dictEl->value->makeNameTree( dictEl->loc, this );
832 }
833 else {
834 /* First make the name tree. */
835 for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) {
836 /* Recurse on the instance. */
837 glel->value->makeNameTree( glel->loc, this );
838 }
839 }
840
841 /* The number of nodes in the tree can now be given by nextNameId */
842 nameIndex = new NameInst*[nextNameId];
843 memset( nameIndex, 0, sizeof(NameInst*)*nextNameId );
844 fillNameIndex( rootName );
845 fillNameIndex( exportsRootName );
846 }
847
848
849 void ParseData::createBuiltin( const char *name, BuiltinMachine builtin )
850 {
851 Expression *expression = new Expression( builtin );
852 Join *join = new Join( expression );
853 MachineDef *machineDef = new MachineDef( join );
854 VarDef *varDef = new VarDef( name, machineDef );
855 GraphDictEl *graphDictEl = new GraphDictEl( name, varDef );
856 graphDict.insert( graphDictEl );
857 }
858
859 /* Initialize the graph dict with builtin types. */
860 void ParseData::initGraphDict( )
861 {
862 createBuiltin( "any", BT_Any );
863 createBuiltin( "ascii", BT_Ascii );
864 createBuiltin( "extend", BT_Extend );
865 createBuiltin( "alpha", BT_Alpha );
866 createBuiltin( "digit", BT_Digit );
867 createBuiltin( "alnum", BT_Alnum );
868 createBuiltin( "lower", BT_Lower );
869 createBuiltin( "upper", BT_Upper );
870 createBuiltin( "cntrl", BT_Cntrl );
871 createBuiltin( "graph", BT_Graph );
872 createBuiltin( "print", BT_Print );
873 createBuiltin( "punct", BT_Punct );
874 createBuiltin( "space", BT_Space );
875 createBuiltin( "xdigit", BT_Xdigit );
876 createBuiltin( "null", BT_Lambda );
877 createBuiltin( "zlen", BT_Lambda );
878 createBuiltin( "empty", BT_Empty );
879 }
880
881 /* Set the alphabet type. If the types are not valid returns false. */
882 bool ParseData::setAlphType( const InputLoc &loc, char *s1, char *s2 )
883 {
884 alphTypeLoc = loc;
885 userAlphType = findAlphType( s1, s2 );
886 alphTypeSet = true;
887 return userAlphType != 0;
888 }
889
890 /* Set the alphabet type. If the types are not valid returns false. */
891 bool ParseData::setAlphType( const InputLoc &loc, char *s1 )
892 {
893 alphTypeLoc = loc;
894 userAlphType = findAlphType( s1 );
895 alphTypeSet = true;
896 return userAlphType != 0;
897 }
898
899 bool ParseData::setVariable( char *var, InlineList *inlineList )
900 {
901 bool set = true;
902
903 if ( strcmp( var, "p" ) == 0 )
904 pExpr = inlineList;
905 else if ( strcmp( var, "pe" ) == 0 )
906 peExpr = inlineList;
907 else if ( strcmp( var, "eof" ) == 0 )
908 eofExpr = inlineList;
909 else if ( strcmp( var, "cs" ) == 0 )
910 csExpr = inlineList;
911 else if ( strcmp( var, "data" ) == 0 )
912 dataExpr = inlineList;
913 else if ( strcmp( var, "top" ) == 0 )
914 topExpr = inlineList;
915 else if ( strcmp( var, "stack" ) == 0 )
916 stackExpr = inlineList;
917 else if ( strcmp( var, "act" ) == 0 )
918 actExpr = inlineList;
919 else if ( strcmp( var, "ts" ) == 0 )
920 tokstartExpr = inlineList;
921 else if ( strcmp( var, "te" ) == 0 )
922 tokendExpr = inlineList;
923 else
924 set = false;
925
926 return set;
927 }
928
929 /* Initialize the key operators object that will be referenced by all fsms
930 * created. */
931 void ParseData::initKeyOps( )
932 {
933 /* Signedness and bounds. */
934 HostType *alphType = alphTypeSet ? userAlphType : hostLang->defaultAlphType;
935 thisKeyOps.setAlphType( alphType );
936
937 if ( lowerNum != 0 ) {
938 /* If ranges are given then interpret the alphabet type. */
939 thisKeyOps.minKey = makeFsmKeyNum( lowerNum, rangeLowLoc, this );
940 thisKeyOps.maxKey = makeFsmKeyNum( upperNum, rangeHighLoc, this );
941 }
942
943 thisCondData.lastCondKey = thisKeyOps.maxKey;
944 }
945
946 void ParseData::printNameInst( NameInst *nameInst, int level )
947 {
948 for ( int i = 0; i < level; i++ )
949 cerr << " ";
950 cerr << (nameInst->name != 0 ? nameInst->name : "<ANON>") <<
951 " id: " << nameInst->id <<
952 " refs: " << nameInst->numRefs <<
953 " uses: " << nameInst->numUses << endl;
954 for ( NameVect::Iter name = nameInst->childVect; name.lte(); name++ )
955 printNameInst( *name, level+1 );
956 }
957
958 /* Remove duplicates of unique actions from an action table. */
959 void ParseData::removeDups( ActionTable &table )
960 {
961 /* Scan through the table looking for unique actions to
962 * remove duplicates of. */
963 for ( int i = 0; i < table.length(); i++ ) {
964 /* Remove any duplicates ahead of i. */
965 for ( int r = i+1; r < table.length(); ) {
966 if ( table[r].value == table[i].value )
967 table.vremove(r);
968 else
969 r += 1;
970 }
971 }
972 }
973
974 /* Remove duplicates from action lists. This operates only on transition and
975 * eof action lists and so should be called once all actions have been
976 * transfered to their final resting place. */
977 void ParseData::removeActionDups( FsmAp *graph )
978 {
979 /* Loop all states. */
980 for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) {
981 /* Loop all transitions. */
982 for ( TransList::Iter trans = state->outList; trans.lte(); trans++ )
983 removeDups( trans->actionTable );
984 removeDups( state->toStateActionTable );
985 removeDups( state->fromStateActionTable );
986 removeDups( state->eofActionTable );
987 }
988 }
989
990 Action *ParseData::newAction( const char *name, InlineList *inlineList )
991 {
992 InputLoc loc;
993 loc.line = 1;
994 loc.col = 1;
995 loc.fileName = "NONE";
996
997 Action *action = new Action( loc, name, inlineList, nextCondId++ );
998 action->actionRefs.append( rootName );
999 actionList.append( action );
1000 return action;
1001 }
1002
1003 void ParseData::initLongestMatchData()
1004 {
1005 if ( lmList.length() > 0 ) {
1006 /* The initTokStart action resets the token start. */
1007 InlineList *il1 = new InlineList;
1008 il1->append( new InlineItem( InputLoc(), InlineItem::LmInitTokStart ) );
1009 initTokStart = newAction( "initts", il1 );
1010 initTokStart->isLmAction = true;
1011
1012 /* The initActId action gives act a default value. */
1013 InlineList *il4 = new InlineList;
1014 il4->append( new InlineItem( InputLoc(), InlineItem::LmInitAct ) );
1015 initActId = newAction( "initact", il4 );
1016 initActId->isLmAction = true;
1017
1018 /* The setTokStart action sets tokstart. */
1019 InlineList *il5 = new InlineList;
1020 il5->append( new InlineItem( InputLoc(), InlineItem::LmSetTokStart ) );
1021 setTokStart = newAction( "ts", il5 );
1022 setTokStart->isLmAction = true;
1023
1024 /* The setTokEnd action sets tokend. */
1025 InlineList *il3 = new InlineList;
1026 il3->append( new InlineItem( InputLoc(), InlineItem::LmSetTokEnd ) );
1027 setTokEnd = newAction( "te", il3 );
1028 setTokEnd->isLmAction = true;
1029
1030 /* The action will also need an ordering: ahead of all user action
1031 * embeddings. */
1032 initTokStartOrd = curActionOrd++;
1033 initActIdOrd = curActionOrd++;
1034 setTokStartOrd = curActionOrd++;
1035 setTokEndOrd = curActionOrd++;
1036 }
1037 }
1038
1039 /* After building the graph, do some extra processing to ensure the runtime
1040 * data of the longest mactch operators is consistent. */
1041 void ParseData::setLongestMatchData( FsmAp *graph )
1042 {
1043 if ( lmList.length() > 0 ) {
1044 /* Make sure all entry points (targets of fgoto, fcall, fnext, fentry)
1045 * init the tokstart. */
1046 for ( EntryMap::Iter en = graph->entryPoints; en.lte(); en++ ) {
1047 /* This is run after duplicates are removed, we must guard against
1048 * inserting a duplicate. */
1049 ActionTable &actionTable = en->value->toStateActionTable;
1050 if ( ! actionTable.hasAction( initTokStart ) )
1051 actionTable.setAction( initTokStartOrd, initTokStart );
1052 }
1053
1054 /* Find the set of states that are the target of transitions with
1055 * actions that have calls. These states will be targeted by fret
1056 * statements. */
1057 StateSet states;
1058 for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) {
1059 for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
1060 for ( ActionTable::Iter ati = trans->actionTable; ati.lte(); ati++ ) {
1061 if ( ati->value->anyCall && trans->toState != 0 )
1062 states.insert( trans->toState );
1063 }
1064 }
1065 }
1066
1067
1068 /* Init tokstart upon entering the above collected states. */
1069 for ( StateSet::Iter ps = states; ps.lte(); ps++ ) {
1070 /* This is run after duplicates are removed, we must guard against
1071 * inserting a duplicate. */
1072 ActionTable &actionTable = (*ps)->toStateActionTable;
1073 if ( ! actionTable.hasAction( initTokStart ) )
1074 actionTable.setAction( initTokStartOrd, initTokStart );
1075 }
1076 }
1077 }
1078
1079 /* Make the graph from a graph dict node. Does minimization and state sorting. */
1080 FsmAp *ParseData::makeInstance( GraphDictEl *gdNode )
1081 {
1082 /* Build the graph from a walk of the parse tree. */
1083 FsmAp *graph = gdNode->value->walk( this );
1084
1085 /* Resolve any labels that point to multiple states. Any labels that are
1086 * still around are referenced only by gotos and calls and they need to be
1087 * made into deterministic entry points. */
1088 graph->deterministicEntry();
1089
1090 /*
1091 * All state construction is now complete.
1092 */
1093
1094 /* Transfer actions from the out action tables to eof action tables. */
1095 for ( StateSet::Iter state = graph->finStateSet; state.lte(); state++ )
1096 graph->transferOutActions( *state );
1097
1098 /* Transfer global error actions. */
1099 for ( StateList::Iter state = graph->stateList; state.lte(); state++ )
1100 graph->transferErrorActions( state, 0 );
1101
1102 if ( ::wantDupsRemoved )
1103 removeActionDups( graph );
1104
1105 /* Remove unreachable states. There should be no dead end states. The
1106 * subtract and intersection operators are the only places where they may
1107 * be created and those operators clean them up. */
1108 graph->removeUnreachableStates();
1109
1110 /* No more fsm operations are to be done. Action ordering numbers are
1111 * no longer of use and will just hinder minimization. Clear them. */
1112 graph->nullActionKeys();
1113
1114 /* Transition priorities are no longer of use. We can clear them
1115 * because they will just hinder minimization as well. Clear them. */
1116 graph->clearAllPriorities();
1117
1118 if ( minimizeOpt != MinimizeNone ) {
1119 /* Minimize here even if we minimized at every op. Now that function
1120 * keys have been cleared we may get a more minimal fsm. */
1121 switch ( minimizeLevel ) {
1122 case MinimizeApprox:
1123 graph->minimizeApproximate();
1124 break;
1125 case MinimizeStable:
1126 graph->minimizeStable();
1127 break;
1128 case MinimizePartition1:
1129 graph->minimizePartition1();
1130 break;
1131 case MinimizePartition2:
1132 graph->minimizePartition2();
1133 break;
1134 }
1135 }
1136
1137 graph->compressTransitions();
1138
1139 return graph;
1140 }
1141
1142 void ParseData::printNameTree()
1143 {
1144 /* Print the name instance map. */
1145 for ( NameVect::Iter name = rootName->childVect; name.lte(); name++ )
1146 printNameInst( *name, 0 );
1147
1148 cerr << "name index:" << endl;
1149 /* Show that the name index is correct. */
1150 for ( int ni = 0; ni < nextNameId; ni++ ) {
1151 cerr << ni << ": ";
1152 const char *name = nameIndex[ni]->name;
1153 cerr << ( name != 0 ? name : "<ANON>" ) << endl;
1154 }
1155 }
1156
1157 FsmAp *ParseData::makeSpecific( GraphDictEl *gdNode )
1158 {
1159 /* Build the name tree and supporting data structures. */
1160 makeNameTree( gdNode );
1161
1162 /* Resove name references from gdNode. */
1163 initNameWalk();
1164 gdNode->value->resolveNameRefs( this );
1165
1166 /* Do not resolve action references. Since we are not building the entire
1167 * graph there's a good chance that many name references will fail. This
1168 * is okay since generating part of the graph is usually only done when
1169 * inspecting the compiled machine. */
1170
1171 /* Same story for extern entry point references. */
1172
1173 /* Flag this case so that the XML code generator is aware that we haven't
1174 * looked up name references in actions. It can then avoid segfaulting. */
1175 generatingSectionSubset = true;
1176
1177 /* Just building the specified graph. */
1178 initNameWalk();
1179 FsmAp *mainGraph = makeInstance( gdNode );
1180
1181 return mainGraph;
1182 }
1183
1184 FsmAp *ParseData::makeAll()
1185 {
1186 /* Build the name tree and supporting data structures. */
1187 makeNameTree( 0 );
1188
1189 /* Resove name references in the tree. */
1190 initNameWalk();
1191 for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ )
1192 glel->value->resolveNameRefs( this );
1193
1194 /* Resolve action code name references. */
1195 resolveActionNameRefs();
1196
1197 /* Force name references to the top level instantiations. */
1198 for ( NameVect::Iter inst = rootName->childVect; inst.lte(); inst++ )
1199 (*inst)->numRefs += 1;
1200
1201 FsmAp *mainGraph = 0;
1202 FsmAp **graphs = new FsmAp*[instanceList.length()];
1203 int numOthers = 0;
1204
1205 /* Make all the instantiations, we know that main exists in this list. */
1206 initNameWalk();
1207 for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) {
1208 if ( strcmp( glel->key, mainMachine ) == 0 ) {
1209 /* Main graph is always instantiated. */
1210 mainGraph = makeInstance( glel );
1211 }
1212 else {
1213 /* Instantiate and store in others array. */
1214 graphs[numOthers++] = makeInstance( glel );
1215 }
1216 }
1217
1218 if ( mainGraph == 0 )
1219 mainGraph = graphs[--numOthers];
1220
1221 if ( numOthers > 0 ) {
1222 /* Add all the other graphs into main. */
1223 mainGraph->globOp( graphs, numOthers );
1224 }
1225
1226 delete[] graphs;
1227 return mainGraph;
1228 }
1229
1230 void ParseData::analyzeAction( Action *action, InlineList *inlineList )
1231 {
1232 /* FIXME: Actions used as conditions should be very constrained. */
1233 for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
1234 if ( item->type == InlineItem::Call || item->type == InlineItem::CallExpr )
1235 action->anyCall = true;
1236
1237 /* Need to recurse into longest match items. */
1238 if ( item->type == InlineItem::LmSwitch ) {
1239 LongestMatch *lm = item->longestMatch;
1240 for ( LmPartList::Iter lmi = *lm->longestMatchList; lmi.lte(); lmi++ ) {
1241 if ( lmi->action != 0 )
1242 analyzeAction( action, lmi->action->inlineList );
1243 }
1244 }
1245
1246 if ( item->type == InlineItem::LmOnLast ||
1247 item->type == InlineItem::LmOnNext ||
1248 item->type == InlineItem::LmOnLagBehind )
1249 {
1250 LongestMatchPart *lmi = item->longestMatchPart;
1251 if ( lmi->action != 0 )
1252 analyzeAction( action, lmi->action->inlineList );
1253 }
1254
1255 if ( item->children != 0 )
1256 analyzeAction( action, item->children );
1257 }
1258 }
1259
1260
1261 /* Check actions for bad uses of fsm directives. We don't go inside longest
1262 * match items in actions created by ragel, since we just want the user
1263 * actions. */
1264 void ParseData::checkInlineList( Action *act, InlineList *inlineList )
1265 {
1266 for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
1267 /* EOF checks. */
1268 if ( act->numEofRefs > 0 ) {
1269 switch ( item->type ) {
1270 /* Currently no checks. */
1271 default:
1272 break;
1273 }
1274 }
1275
1276 /* Recurse. */
1277 if ( item->children != 0 )
1278 checkInlineList( act, item->children );
1279 }
1280 }
1281
1282 void ParseData::checkAction( Action *action )
1283 {
1284 /* Check for actions with calls that are embedded within a longest match
1285 * machine. */
1286 if ( !action->isLmAction && action->numRefs() > 0 && action->anyCall ) {
1287 for ( ActionRefs::Iter ar = action->actionRefs; ar.lte(); ar++ ) {
1288 NameInst *check = *ar;
1289 while ( check != 0 ) {
1290 if ( check->isLongestMatch ) {
1291 error(action->loc) << "within a scanner, fcall is permitted"
1292 " only in pattern actions" << endl;
1293 break;
1294 }
1295 check = check->parent;
1296 }
1297 }
1298 }
1299
1300 checkInlineList( action, action->inlineList );
1301 }
1302
1303
1304 void ParseData::analyzeGraph( FsmAp *graph )
1305 {
1306 for ( ActionList::Iter act = actionList; act.lte(); act++ )
1307 analyzeAction( act, act->inlineList );
1308
1309 for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
1310 /* The transition list. */
1311 for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
1312 for ( ActionTable::Iter at = trans->actionTable; at.lte(); at++ )
1313 at->value->numTransRefs += 1;
1314 }
1315
1316 for ( ActionTable::Iter at = st->toStateActionTable; at.lte(); at++ )
1317 at->value->numToStateRefs += 1;
1318
1319 for ( ActionTable::Iter at = st->fromStateActionTable; at.lte(); at++ )
1320 at->value->numFromStateRefs += 1;
1321
1322 for ( ActionTable::Iter at = st->eofActionTable; at.lte(); at++ )
1323 at->value->numEofRefs += 1;
1324
1325 for ( StateCondList::Iter sc = st->stateCondList; sc.lte(); sc++ ) {
1326 for ( CondSet::Iter sci = sc->condSpace->condSet; sci.lte(); sci++ )
1327 (*sci)->numCondRefs += 1;
1328 }
1329 }
1330
1331 /* Checks for bad usage of directives in action code. */
1332 for ( ActionList::Iter act = actionList; act.lte(); act++ )
1333 checkAction( act );
1334 }
1335
1336 void ParseData::makeExportsNameTree()
1337 {
1338 /* Make a name tree for the exports. */
1339 initExportsNameWalk();
1340
1341 /* First make the name tree. */
1342 for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) {
1343 if ( gdel->value->isExport ) {
1344 /* Recurse on the instance. */
1345 gdel->value->makeNameTree( gdel->loc, this );
1346 }
1347 }
1348 }
1349
1350 void ParseData::makeExports()
1351 {
1352 makeExportsNameTree();
1353
1354 /* Resove name references in the tree. */
1355 initExportsNameWalk();
1356 for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) {
1357 if ( gdel->value->isExport )
1358 gdel->value->resolveNameRefs( this );
1359 }
1360
1361 /* Make all the instantiations, we know that main exists in this list. */
1362 initExportsNameWalk();
1363 for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) {
1364 /* Check if this var def is an export. */
1365 if ( gdel->value->isExport ) {
1366 /* Build the graph from a walk of the parse tree. */
1367 FsmAp *graph = gdel->value->walk( this );
1368
1369 /* Build the graph from a walk of the parse tree. */
1370 if ( !graph->checkSingleCharMachine() ) {
1371 error(gdel->loc) << "bad export machine, must define "
1372 "a single character" << endl;
1373 }
1374 else {
1375 /* Safe to extract the key and declare the export. */
1376 Key exportKey = graph->startState->outList.head->lowKey;
1377 exportList.append( new Export( gdel->value->name, exportKey ) );
1378 }
1379 }
1380 }
1381
1382 }
1383
1384 /* Construct the machine and catch failures which can occur during
1385 * construction. */
1386 void ParseData::prepareMachineGen( GraphDictEl *graphDictEl )
1387 {
1388 try {
1389 /* This machine construction can fail. */
1390 prepareMachineGenTBWrapped( graphDictEl );
1391 }
1392 catch ( FsmConstructFail fail ) {
1393 switch ( fail.reason ) {
1394 case FsmConstructFail::CondNoKeySpace: {
1395 InputLoc &loc = alphTypeSet ? alphTypeLoc : sectionLoc;
1396 error(loc) << "sorry, no more characters are "
1397 "available in the alphabet space" << endl;
1398 error(loc) << " for conditions, please use a "
1399 "smaller alphtype or reduce" << endl;
1400 error(loc) << " the span of characters on which "
1401 "conditions are embedded" << endl;
1402 break;
1403 }
1404 }
1405 }
1406 }
1407
1408 void ParseData::prepareMachineGenTBWrapped( GraphDictEl *graphDictEl )
1409 {
1410 beginProcessing();
1411 initKeyOps();
1412 makeRootNames();
1413 initLongestMatchData();
1414
1415 /* Make the graph, do minimization. */
1416 if ( graphDictEl == 0 )
1417 sectionGraph = makeAll();
1418 else
1419 sectionGraph = makeSpecific( graphDictEl );
1420
1421 /* Compute exports from the export definitions. */
1422 makeExports();
1423
1424 /* If any errors have occured in the input file then don't write anything. */
1425 if ( gblErrorCount > 0 )
1426 return;
1427
1428 analyzeGraph( sectionGraph );
1429
1430 /* Depends on the graph analysis. */
1431 setLongestMatchData( sectionGraph );
1432
1433 /* Decide if an error state is necessary.
1434 * 1. There is an error transition
1435 * 2. There is a gap in the transitions
1436 * 3. The longest match operator requires it. */
1437 if ( lmRequiresErrorState || sectionGraph->hasErrorTrans() )
1438 sectionGraph->errState = sectionGraph->addState();
1439
1440 /* State numbers need to be assigned such that all final states have a
1441 * larger state id number than all non-final states. This enables the
1442 * first_final mechanism to function correctly. We also want states to be
1443 * ordered in a predictable fashion. So we first apply a depth-first
1444 * search, then do a stable sort by final state status, then assign
1445 * numbers. */
1446
1447 sectionGraph->depthFirstOrdering();
1448 sectionGraph->sortStatesByFinal();
1449 sectionGraph->setStateNumbers( 0 );
1450 }
1451
1452 void ParseData::generateReduced( InputData &inputData )
1453 {
1454 beginProcessing();
1455
1456 cgd = makeCodeGen( inputData.inputFileName, sectionName, *inputData.outStream );
1457
1458 /* Make the generator. */
1459 BackendGen backendGen( sectionName, this, sectionGraph, cgd );
1460
1461 /* Write out with it. */
1462 backendGen.makeBackend();
1463
1464 if ( printStatistics ) {
1465 cerr << "fsm name : " << sectionName << endl;
1466 cerr << "num states: " << sectionGraph->stateList.length() << endl;
1467 cerr << endl;
1468 }
1469 }
1470
1471 void ParseData::generateXML( ostream &out )
1472 {
1473 beginProcessing();
1474
1475 /* Make the generator. */
1476 XMLCodeGen codeGen( sectionName, this, sectionGraph, out );
1477
1478 /* Write out with it. */
1479 codeGen.writeXML();
1480
1481 if ( printStatistics ) {
1482 cerr << "fsm name : " << sectionName << endl;
1483 cerr << "num states: " << sectionGraph->stateList.length() << endl;
1484 cerr << endl;
1485 }
1486 }
1487