"Fossies" - the Fresh Open Source Software Archive 
Member "ragel-6.10/ragel/parsetree.h" (24 Mar 2017, 17767 Bytes) of package /linux/misc/ragel-6.10.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "parsetree.h" see the
Fossies "Dox" file reference documentation and the latest
Fossies "Diffs" side-by-side code changes report:
6.9_vs_6.10.
1 /*
2 * Copyright 2001-2006 Adrian Thurston <thurston@complang.org>
3 */
4
5 /* This file is part of Ragel.
6 *
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21
22 #ifndef _PARSETREE_H
23 #define _PARSETREE_H
24
25 #include "ragel.h"
26 #include "avlmap.h"
27 #include "bstmap.h"
28 #include "vector.h"
29 #include "dlist.h"
30
31 struct NameInst;
32
33 /* Types of builtin machines. */
34 enum BuiltinMachine
35 {
36 BT_Any,
37 BT_Ascii,
38 BT_Extend,
39 BT_Alpha,
40 BT_Digit,
41 BT_Alnum,
42 BT_Lower,
43 BT_Upper,
44 BT_Cntrl,
45 BT_Graph,
46 BT_Print,
47 BT_Punct,
48 BT_Space,
49 BT_Xdigit,
50 BT_Lambda,
51 BT_Empty
52 };
53
54
55 struct ParseData;
56
57 /* Leaf type. */
58 struct Literal;
59
60 /* Tree nodes. */
61
62 struct Term;
63 struct FactorWithAug;
64 struct FactorWithRep;
65 struct FactorWithNeg;
66 struct Factor;
67 struct Expression;
68 struct Join;
69 struct MachineDef;
70 struct LongestMatch;
71 struct LongestMatchPart;
72 struct LmPartList;
73 struct Range;
74 struct LengthDef;
75
76 /* Type of augmentation. Describes locations in the machine. */
77 enum AugType
78 {
79 /* Transition actions/priorities. */
80 at_start,
81 at_all,
82 at_finish,
83 at_leave,
84
85 /* Global error actions. */
86 at_start_gbl_error,
87 at_all_gbl_error,
88 at_final_gbl_error,
89 at_not_start_gbl_error,
90 at_not_final_gbl_error,
91 at_middle_gbl_error,
92
93 /* Local error actions. */
94 at_start_local_error,
95 at_all_local_error,
96 at_final_local_error,
97 at_not_start_local_error,
98 at_not_final_local_error,
99 at_middle_local_error,
100
101 /* To State Action embedding. */
102 at_start_to_state,
103 at_all_to_state,
104 at_final_to_state,
105 at_not_start_to_state,
106 at_not_final_to_state,
107 at_middle_to_state,
108
109 /* From State Action embedding. */
110 at_start_from_state,
111 at_all_from_state,
112 at_final_from_state,
113 at_not_start_from_state,
114 at_not_final_from_state,
115 at_middle_from_state,
116
117 /* EOF Action embedding. */
118 at_start_eof,
119 at_all_eof,
120 at_final_eof,
121 at_not_start_eof,
122 at_not_final_eof,
123 at_middle_eof
124 };
125
126 /* IMPORTANT: These must follow the same order as the state augs in AugType
127 * since we will be using this to compose AugType. */
128 enum StateAugType
129 {
130 sat_start = 0,
131 sat_all,
132 sat_final,
133 sat_not_start,
134 sat_not_final,
135 sat_middle
136 };
137
138 struct Action;
139 struct PriorDesc;
140 struct RegExpr;
141 struct ReItem;
142 struct ReOrBlock;
143 struct ReOrItem;
144 struct ExplicitMachine;
145 struct InlineItem;
146 struct InlineList;
147
148 /* Reference to a named state. */
149 typedef Vector<char*> NameRef;
150 typedef Vector<NameRef*> NameRefList;
151 typedef Vector<NameInst*> NameTargList;
152
153 /* Structure for storing location of epsilon transitons. */
154 struct EpsilonLink
155 {
156 EpsilonLink( const InputLoc &loc, NameRef &target )
157 : loc(loc), target(target) { }
158
159 InputLoc loc;
160 NameRef target;
161 };
162
163 struct Label
164 {
165 Label( const InputLoc &loc, char *data )
166 : loc(loc), data(data) { }
167
168 InputLoc loc;
169 char *data;
170 };
171
172 /* Structrue represents an action assigned to some FactorWithAug node. The
173 * factor with aug will keep an array of these. */
174 struct ParserAction
175 {
176 ParserAction( const InputLoc &loc, AugType type, int localErrKey, Action *action )
177 : loc(loc), type(type), localErrKey(localErrKey), action(action) { }
178
179 InputLoc loc;
180 AugType type;
181 int localErrKey;
182 Action *action;
183 };
184
185 struct ConditionTest
186 {
187 ConditionTest( const InputLoc &loc, AugType type, Action *action, bool sense ) :
188 loc(loc), type(type), action(action), sense(sense) { }
189
190 InputLoc loc;
191 AugType type;
192 Action *action;
193 bool sense;
194 };
195
196 struct Token
197 {
198 char *data;
199 int length;
200 InputLoc loc;
201
202 void append( const Token &other );
203 void set( const char *str, int len );
204 };
205
206 char *prepareLitString( const InputLoc &loc, const char *src, long length,
207 long &resLen, bool &caseInsensitive );
208
209 /* Store the value and type of a priority augmentation. */
210 struct PriorityAug
211 {
212 PriorityAug( AugType type, int priorKey, int priorValue ) :
213 type(type), priorKey(priorKey), priorValue(priorValue) { }
214
215 AugType type;
216 int priorKey;
217 int priorValue;
218 };
219
220 /*
221 * A Variable Definition
222 */
223 struct VarDef
224 {
225 VarDef( const char *name, MachineDef *machineDef )
226 : name(name), machineDef(machineDef), isExport(false) { }
227
228 /* Parse tree traversal. */
229 FsmAp *walk( ParseData *pd );
230 void makeNameTree( const InputLoc &loc, ParseData *pd );
231 void resolveNameRefs( ParseData *pd );
232
233 const char *name;
234 MachineDef *machineDef;
235 bool isExport;
236 };
237
238
239 /*
240 * LongestMatch
241 *
242 * Wherever possible the item match will execute on the character. If not
243 * possible the item match will execute on a lookahead character and either
244 * hold the current char (if one away) or backup.
245 *
246 * How to handle the problem of backing up over a buffer break?
247 *
248 * Don't want to use pending out transitions for embedding item match because
249 * the role of item match action is different: it may sometimes match on the
250 * final transition, or may match on a lookahead character.
251 *
252 * Don't want to invent a new operator just for this. So just trail action
253 * after machine, this means we can only use literal actions.
254 *
255 * The item action may
256 *
257 * What states of the machine will be final. The item actions that wrap around
258 * on the last character will go straight to the start state.
259 *
260 * Some transitions will be lookahead transitions, they will hold the current
261 * character. Crossing them with regular transitions must be restricted
262 * because it does not make sense. The transition cannot simultaneously hold
263 * and consume the current character.
264 */
265 struct LongestMatchPart
266 {
267 LongestMatchPart( Join *join, Action *action,
268 InputLoc &semiLoc, int longestMatchId )
269 :
270 join(join), action(action), semiLoc(semiLoc),
271 longestMatchId(longestMatchId), inLmSelect(false) { }
272
273 InputLoc getLoc();
274
275 Join *join;
276 Action *action;
277 InputLoc semiLoc;
278
279 Action *setActId;
280 Action *actOnLast;
281 Action *actOnNext;
282 Action *actLagBehind;
283 int longestMatchId;
284 bool inLmSelect;
285 LongestMatch *longestMatch;
286
287 LongestMatchPart *prev, *next;
288 };
289
290 /* Declare a new type so that ptreetypes.h need not include dlist.h. */
291 struct LmPartList : DList<LongestMatchPart> {};
292
293 struct LongestMatch
294 {
295 /* Construct with a list of joins */
296 LongestMatch( const InputLoc &loc, LmPartList *longestMatchList ) :
297 loc(loc), longestMatchList(longestMatchList), name(0),
298 lmSwitchHandlesError(false) { }
299
300 /* Tree traversal. */
301 FsmAp *walk( ParseData *pd );
302 void makeNameTree( ParseData *pd );
303 void resolveNameRefs( ParseData *pd );
304 void transferScannerLeavingActions( FsmAp *graph );
305 void runLongestMatch( ParseData *pd, FsmAp *graph );
306 Action *newAction( ParseData *pd, const InputLoc &loc, const char *name,
307 InlineList *inlineList );
308 void makeActions( ParseData *pd );
309 void findName( ParseData *pd );
310 void restart( FsmAp *graph, TransAp *trans );
311
312 InputLoc loc;
313 LmPartList *longestMatchList;
314 const char *name;
315
316 Action *lmActSelect;
317 bool lmSwitchHandlesError;
318
319 LongestMatch *next, *prev;
320 };
321
322
323 /* List of Expressions. */
324 typedef DList<Expression> ExprList;
325
326 struct MachineDef
327 {
328 enum Type {
329 JoinType,
330 LongestMatchType,
331 LengthDefType
332 };
333
334 MachineDef( Join *join )
335 : join(join), longestMatch(0), lengthDef(0), type(JoinType) {}
336 MachineDef( LongestMatch *longestMatch )
337 : join(0), longestMatch(longestMatch), lengthDef(0), type(LongestMatchType) {}
338 MachineDef( LengthDef *lengthDef )
339 : join(0), longestMatch(0), lengthDef(lengthDef), type(LengthDefType) {}
340
341 FsmAp *walk( ParseData *pd );
342 void makeNameTree( ParseData *pd );
343 void resolveNameRefs( ParseData *pd );
344
345 Join *join;
346 LongestMatch *longestMatch;
347 LengthDef *lengthDef;
348 Type type;
349 };
350
351 /*
352 * Join
353 */
354 struct Join
355 {
356 /* Construct with the first expression. */
357 Join( Expression *expr );
358 Join( const InputLoc &loc, Expression *expr );
359
360 /* Tree traversal. */
361 FsmAp *walk( ParseData *pd );
362 FsmAp *walkJoin( ParseData *pd );
363 void makeNameTree( ParseData *pd );
364 void resolveNameRefs( ParseData *pd );
365
366 /* Data. */
367 InputLoc loc;
368 ExprList exprList;
369 };
370
371 /*
372 * Expression
373 */
374 struct Expression
375 {
376 enum Type {
377 OrType,
378 IntersectType,
379 SubtractType,
380 StrongSubtractType,
381 TermType,
382 BuiltinType
383 };
384
385 /* Construct with an expression on the left and a term on the right. */
386 Expression( Expression *expression, Term *term, Type type ) :
387 expression(expression), term(term),
388 type(type), prev(this), next(this) { }
389
390 /* Construct with only a term. */
391 Expression( Term *term ) :
392 expression(0), term(term),
393 type(TermType) , prev(this), next(this) { }
394
395 /* Construct with a builtin type. */
396 Expression( BuiltinMachine builtin ) :
397 expression(0), term(0), builtin(builtin),
398 type(BuiltinType), prev(this), next(this) { }
399
400 ~Expression();
401
402 /* Tree traversal. */
403 FsmAp *walk( ParseData *pd, bool lastInSeq = true );
404 void makeNameTree( ParseData *pd );
405 void resolveNameRefs( ParseData *pd );
406
407 /* Node data. */
408 Expression *expression;
409 Term *term;
410 BuiltinMachine builtin;
411 Type type;
412
413 Expression *prev, *next;
414 };
415
416 /*
417 * Term
418 */
419 struct Term
420 {
421 enum Type {
422 ConcatType,
423 RightStartType,
424 RightFinishType,
425 LeftType,
426 FactorWithAugType
427 };
428
429 Term( Term *term, FactorWithAug *factorWithAug ) :
430 term(term), factorWithAug(factorWithAug), type(ConcatType) { }
431
432 Term( Term *term, FactorWithAug *factorWithAug, Type type ) :
433 term(term), factorWithAug(factorWithAug), type(type) { }
434
435 Term( FactorWithAug *factorWithAug ) :
436 term(0), factorWithAug(factorWithAug), type(FactorWithAugType) { }
437
438 ~Term();
439
440 FsmAp *walk( ParseData *pd, bool lastInSeq = true );
441 void makeNameTree( ParseData *pd );
442 void resolveNameRefs( ParseData *pd );
443
444 Term *term;
445 FactorWithAug *factorWithAug;
446 Type type;
447
448 /* Priority descriptor for RightFinish type. */
449 PriorDesc priorDescs[2];
450 };
451
452
453 /* Third level of precedence. Augmenting nodes with actions and priorities. */
454 struct FactorWithAug
455 {
456 FactorWithAug( FactorWithRep *factorWithRep ) :
457 priorDescs(0), factorWithRep(factorWithRep) { }
458 ~FactorWithAug();
459
460 /* Tree traversal. */
461 FsmAp *walk( ParseData *pd );
462 void makeNameTree( ParseData *pd );
463 void resolveNameRefs( ParseData *pd );
464
465 void assignActions( ParseData *pd, FsmAp *graph, int *actionOrd );
466 void assignPriorities( FsmAp *graph, int *priorOrd );
467
468 void assignConditions( FsmAp *graph );
469
470 /* Actions and priorities assigned to the factor node. */
471 Vector<ParserAction> actions;
472 Vector<PriorityAug> priorityAugs;
473 PriorDesc *priorDescs;
474 Vector<Label> labels;
475 Vector<EpsilonLink> epsilonLinks;
476 Vector<ConditionTest> conditions;
477
478 FactorWithRep *factorWithRep;
479 };
480
481 /* Fourth level of precedence. Trailing unary operators. Provide kleen star,
482 * optional and plus. */
483 struct FactorWithRep
484 {
485 enum Type {
486 StarType,
487 StarStarType,
488 OptionalType,
489 PlusType,
490 ExactType,
491 MaxType,
492 MinType,
493 RangeType,
494 FactorWithNegType
495 };
496
497 FactorWithRep( const InputLoc &loc, FactorWithRep *factorWithRep,
498 int lowerRep, int upperRep, Type type ) :
499 loc(loc), factorWithRep(factorWithRep),
500 factorWithNeg(0), lowerRep(lowerRep),
501 upperRep(upperRep), type(type) { }
502
503 FactorWithRep( FactorWithNeg *factorWithNeg )
504 : factorWithNeg(factorWithNeg), type(FactorWithNegType) { }
505
506 ~FactorWithRep();
507
508 /* Tree traversal. */
509 FsmAp *walk( ParseData *pd );
510 void makeNameTree( ParseData *pd );
511 void resolveNameRefs( ParseData *pd );
512
513 InputLoc loc;
514 FactorWithRep *factorWithRep;
515 FactorWithNeg *factorWithNeg;
516 int lowerRep, upperRep;
517 Type type;
518
519 /* Priority descriptor for StarStar type. */
520 PriorDesc priorDescs[2];
521 };
522
523 /* Fifth level of precedence. Provides Negation. */
524 struct FactorWithNeg
525 {
526 enum Type {
527 NegateType,
528 CharNegateType,
529 FactorType
530 };
531
532 FactorWithNeg( const InputLoc &loc, FactorWithNeg *factorWithNeg, Type type) :
533 loc(loc), factorWithNeg(factorWithNeg), factor(0), type(type) { }
534
535 FactorWithNeg( Factor *factor ) :
536 factorWithNeg(0), factor(factor), type(FactorType) { }
537
538 ~FactorWithNeg();
539
540 /* Tree traversal. */
541 FsmAp *walk( ParseData *pd );
542 void makeNameTree( ParseData *pd );
543 void resolveNameRefs( ParseData *pd );
544
545 InputLoc loc;
546 FactorWithNeg *factorWithNeg;
547 Factor *factor;
548 Type type;
549 };
550
551 /*
552 * Factor
553 */
554 struct Factor
555 {
556 /* Language elements a factor node can be. */
557 enum Type {
558 LiteralType,
559 RangeType,
560 OrExprType,
561 RegExprType,
562 ReferenceType,
563 ParenType,
564 LongestMatchType,
565 };
566
567 /* Construct with a literal fsm. */
568 Factor( Literal *literal ) :
569 literal(literal), type(LiteralType) { }
570
571 /* Construct with a range. */
572 Factor( Range *range ) :
573 range(range), type(RangeType) { }
574
575 /* Construct with the or part of a regular expression. */
576 Factor( ReItem *reItem ) :
577 reItem(reItem), type(OrExprType) { }
578
579 /* Construct with a regular expression. */
580 Factor( RegExpr *regExpr ) :
581 regExpr(regExpr), type(RegExprType) { }
582
583 /* Construct with a reference to a var def. */
584 Factor( const InputLoc &loc, VarDef *varDef ) :
585 loc(loc), varDef(varDef), type(ReferenceType) {}
586
587 /* Construct with a parenthesized join. */
588 Factor( Join *join ) :
589 join(join), type(ParenType) {}
590
591 /* Construct with a longest match operator. */
592 Factor( LongestMatch *longestMatch ) :
593 longestMatch(longestMatch), type(LongestMatchType) {}
594
595 /* Cleanup. */
596 ~Factor();
597
598 /* Tree traversal. */
599 FsmAp *walk( ParseData *pd );
600 void makeNameTree( ParseData *pd );
601 void resolveNameRefs( ParseData *pd );
602
603 InputLoc loc;
604 Literal *literal;
605 Range *range;
606 ReItem *reItem;
607 RegExpr *regExpr;
608 VarDef *varDef;
609 Join *join;
610 LongestMatch *longestMatch;
611 int lower, upper;
612 Type type;
613 };
614
615 /* A range machine. Only ever composed of two literals. */
616 struct Range
617 {
618 Range( Literal *lowerLit, Literal *upperLit )
619 : lowerLit(lowerLit), upperLit(upperLit) { }
620
621 ~Range();
622 FsmAp *walk( ParseData *pd );
623
624 Literal *lowerLit;
625 Literal *upperLit;
626 };
627
628 /* Some literal machine. Can be a number or literal string. */
629 struct Literal
630 {
631 enum LiteralType { Number, LitString };
632
633 Literal( const Token &token, LiteralType type )
634 : token(token), type(type) { }
635
636 FsmAp *walk( ParseData *pd );
637
638 Token token;
639 LiteralType type;
640 };
641
642 /* Regular expression. */
643 struct RegExpr
644 {
645 enum RegExpType { RecurseItem, Empty };
646
647 /* Constructors. */
648 RegExpr() :
649 type(Empty), caseInsensitive(false) { }
650 RegExpr(RegExpr *regExpr, ReItem *item) :
651 regExpr(regExpr), item(item),
652 type(RecurseItem), caseInsensitive(false) { }
653
654 ~RegExpr();
655 FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
656
657 RegExpr *regExpr;
658 ReItem *item;
659 RegExpType type;
660 bool caseInsensitive;
661 };
662
663 /* An item in a regular expression. */
664 struct ReItem
665 {
666 enum ReItemType { Data, Dot, OrBlock, NegOrBlock };
667
668 ReItem( const InputLoc &loc, const Token &token )
669 : loc(loc), token(token), star(false), type(Data) { }
670 ReItem( const InputLoc &loc, ReItemType type )
671 : loc(loc), star(false), type(type) { }
672 ReItem( const InputLoc &loc, ReOrBlock *orBlock, ReItemType type )
673 : loc(loc), orBlock(orBlock), star(false), type(type) { }
674
675 ~ReItem();
676 FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
677
678 InputLoc loc;
679 Token token;
680 ReOrBlock *orBlock;
681 bool star;
682 ReItemType type;
683 };
684
685 /* An or block item. */
686 struct ReOrBlock
687 {
688 enum ReOrBlockType { RecurseItem, Empty };
689
690 /* Constructors. */
691 ReOrBlock()
692 : type(Empty) { }
693 ReOrBlock(ReOrBlock *orBlock, ReOrItem *item)
694 : orBlock(orBlock), item(item), type(RecurseItem) { }
695
696 ~ReOrBlock();
697 FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
698
699 ReOrBlock *orBlock;
700 ReOrItem *item;
701 ReOrBlockType type;
702 };
703
704 /* An item in an or block. */
705 struct ReOrItem
706 {
707 enum ReOrItemType { Data, Range };
708
709 ReOrItem( const InputLoc &loc, const Token &token )
710 : loc(loc), token(token), type(Data) {}
711 ReOrItem( const InputLoc &loc, char lower, char upper )
712 : loc(loc), lower(lower), upper(upper), type(Range) { }
713
714 FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
715
716 InputLoc loc;
717 Token token;
718 char lower;
719 char upper;
720 ReOrItemType type;
721 };
722
723
724 /*
725 * Inline code tree
726 */
727 struct InlineList;
728 struct InlineItem
729 {
730 enum Type
731 {
732 Text, Goto, Call, Next, GotoExpr, CallExpr, NextExpr, Ret, PChar,
733 Char, Hold, Curs, Targs, Entry, Exec, LmSwitch, LmSetActId,
734 LmSetTokEnd, LmOnLast, LmOnNext, LmOnLagBehind, LmInitAct,
735 LmInitTokStart, LmSetTokStart, Break
736 };
737
738 InlineItem( const InputLoc &loc, char *data, Type type ) :
739 loc(loc), data(data), nameRef(0), children(0), type(type) { }
740
741 InlineItem( const InputLoc &loc, NameRef *nameRef, Type type ) :
742 loc(loc), data(0), nameRef(nameRef), children(0), type(type) { }
743
744 InlineItem( const InputLoc &loc, LongestMatch *longestMatch,
745 LongestMatchPart *longestMatchPart, Type type ) : loc(loc), data(0),
746 nameRef(0), children(0), longestMatch(longestMatch),
747 longestMatchPart(longestMatchPart), type(type) { }
748
749 InlineItem( const InputLoc &loc, NameInst *nameTarg, Type type ) :
750 loc(loc), data(0), nameRef(0), nameTarg(nameTarg), children(0),
751 type(type) { }
752
753 InlineItem( const InputLoc &loc, Type type ) :
754 loc(loc), data(0), nameRef(0), children(0), type(type) { }
755
756 InputLoc loc;
757 char *data;
758 NameRef *nameRef;
759 NameInst *nameTarg;
760 InlineList *children;
761 LongestMatch *longestMatch;
762 LongestMatchPart *longestMatchPart;
763 Type type;
764
765 InlineItem *prev, *next;
766 };
767
768 /* Normally this would be atypedef, but that would entail including DList from
769 * ptreetypes, which should be just typedef forwards. */
770 struct InlineList : public DList<InlineItem> { };
771
772 #endif