"Fossies" - the Fresh Open Source Software Archive

Member "ragel-7.0.4/src/inputdata.cc" (15 Feb 2021, 32909 Bytes) of package /linux/misc/ragel-7.0.4.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "inputdata.cc" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 7.0.3_vs_7.0.4.

    1 /*
    2  * Copyright 2008-2018 Adrian Thurston <thurston@colm.net>
    3  *
    4  * Permission is hereby granted, free of charge, to any person obtaining a copy
    5  * of this software and associated documentation files (the "Software"), to
    6  * deal in the Software without restriction, including without limitation the
    7  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
    8  * sell copies of the Software, and to permit persons to whom the Software is
    9  * furnished to do so, subject to the following conditions:
   10  *
   11  * The above copyright notice and this permission notice shall be included in all
   12  * copies or substantial portions of the Software.
   13  *
   14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
   17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
   19  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   20  * SOFTWARE.
   21  */
   22 
   23 #include <libfsm/ragel.h>
   24 #include <libfsm/common.h>
   25 #include "inputdata.h"
   26 #include "parsedata.h"
   27 #include "load.h"
   28 #include "rlscan.h"
   29 #include "reducer.h"
   30 #include "version.h"
   31 #include "pcheck.h"
   32 #include <libfsm/dot.h>
   33 
   34 #include <colm/colm.h>
   35 
   36 #include <stdlib.h>
   37 #include <string.h>
   38 #include <stdio.h>
   39 #include <iostream>
   40 #include <iomanip>
   41 #include <fstream>
   42 #include <unistd.h>
   43 #include <sstream>
   44 #include <sys/types.h>
   45 #include <sys/stat.h>
   46 #include <fcntl.h>
   47 #include <errno.h>
   48 #if defined(HAVE_SYS_WAIT_H)
   49 #include <sys/wait.h>
   50 #endif
   51 
   52 #ifdef _WIN32
   53 #include <windows.h>
   54 #include <psapi.h>
   55 #include <time.h>
   56 #include <io.h>
   57 #include <process.h>
   58 
   59 #if _MSC_VER
   60 #define S_IRUSR _S_IREAD
   61 #define S_IWUSR _S_IWRITE
   62 #endif
   63 #endif
   64 
   65 using std::istream;
   66 using std::ifstream;
   67 using std::ofstream;
   68 using std::stringstream;
   69 using std::ostream;
   70 using std::endl;
   71 using std::ios;
   72 
   73 InputData::~InputData()
   74 {
   75     inputItems.empty();
   76     parseDataList.empty();
   77     sectionList.empty();
   78 
   79     for ( Vector<const char**>::Iter fns = streamFileNames; fns.lte(); fns++ ) {
   80         const char **ptr = *fns;
   81         while ( *ptr != 0 ) {
   82             ::free( (void*)*ptr );
   83             ptr += 1;
   84         }
   85         free( (void*) *fns );
   86     }
   87 
   88     if ( outputFileName != 0 )
   89         delete[] outputFileName;
   90 
   91     if ( histogramFn != 0 )
   92         ::free( (void*)histogramFn );
   93 
   94     if ( histogram != 0 )
   95         delete[] histogram;
   96 
   97     for ( ArgsVector::Iter bl = breadthLabels; bl.lte(); bl++ )
   98         free( (void*) *bl );
   99 }
  100 
  101 void InputData::makeDefaultFileName()
  102 {
  103     if ( outputFileName == 0 )
  104         outputFileName = (hostLang->defaultOutFn)( inputFileName );
  105 }
  106 
  107 bool InputData::isBreadthLabel( const string &label )
  108 {
  109     for ( ArgsVector::Iter bl = breadthLabels; bl.lte(); bl++ ) {
  110         if ( label == *bl )
  111             return true;
  112     }
  113     return false;
  114 }
  115 
  116 void InputData::createOutputStream()
  117 {
  118     /* Make sure we are not writing to the same file as the input file. */
  119     if ( outputFileName != 0 ) {
  120         if ( strcmp( inputFileName, outputFileName  ) == 0 ) {
  121             error() << "output file \"" << outputFileName  << 
  122                     "\" is the same as the input file" << endl;
  123         }
  124 
  125         /* Create the filter on the output and open it. */
  126         outFilter = new output_filter( outputFileName );
  127 
  128         /* Open the output stream, attaching it to the filter. */
  129         outStream = new ostream( outFilter );
  130     }
  131     else {
  132         /* Writing out to std out. */
  133         outStream = &std::cout;
  134     }
  135 }
  136 
  137 void InputData::openOutput()
  138 {
  139     if ( outFilter != 0 ) {
  140         outFilter->open( outputFileName, ios::out|ios::trunc );
  141         if ( !outFilter->is_open() ) {
  142             error() << "error opening " << outputFileName << " for writing" << endl;
  143             abortCompile( 1 );
  144         }
  145     }
  146 }
  147 
  148 void InputData::prepareSingleMachine()
  149 {
  150     ParseData *pd = 0;
  151     GraphDictEl *gdEl = 0;
  152 
  153     /* Locate a machine spec to generate dot output for. We can only emit.
  154      * Dot takes one graph at a time. */
  155     if ( machineSpec != 0 ) {
  156         /* Machine specified. */
  157         ParseDataDictEl *pdEl = parseDataDict.find( machineSpec );
  158         if ( pdEl == 0 )
  159             error() << "could not locate machine specified with -S and/or -M" << endp;
  160         pd = pdEl->value;
  161     }
  162     else { 
  163         /* No machine spec given, generate the first one. */
  164         if ( parseDataList.length() == 0 )
  165             error() << "no machine specification to generate graphviz output" << endp;
  166 
  167         pd = parseDataList.head;
  168     }
  169 
  170     if ( machineName != 0 ) {
  171         gdEl = pd->graphDict.find( machineName );
  172         if ( gdEl == 0 )
  173             error() << "machine definition/instantiation not found" << endp;
  174     }
  175     else {
  176         /* We are using the whole machine spec. Need to make sure there
  177          * are instances in the spec. */
  178         if ( pd->instanceList.length() == 0 )
  179             error() << "no machine instantiations to generate graphviz output" << endp;
  180     }
  181 
  182     pd->prepareMachineGen( gdEl, hostLang );
  183     dotGenPd = pd;
  184 }
  185 
  186 void InputData::prepareAllMachines()
  187 {
  188     for ( ParseDataDict::Iter pdel = parseDataDict; pdel.lte(); pdel++ ) {
  189         ParseData *pd = pdel->value;
  190         if ( pd->instanceList.length() > 0 ) {
  191             pd->prepareMachineGen( 0, hostLang );
  192 
  193             pd->makeExports();
  194         }
  195 
  196     }
  197 }
  198 
  199 void InputData::generateReduced()
  200 {
  201     for ( ParseDataDict::Iter pdel = parseDataDict; pdel.lte(); pdel++ ) {
  202         ParseData *pd = pdel->value;
  203         if ( pd->instanceList.length() > 0 )
  204             pd->generateReduced( inputFileName, codeStyle, *outStream, hostLang );
  205     }
  206 }
  207 
  208 void InputData::verifyWriteHasData( InputItem *ii )
  209 {
  210     if ( ii->type == InputItem::Write ) {
  211         if ( ii->pd->cgd == 0 )
  212             error( ii->loc ) << ii->pd->sectionName << ": no machine instantiations to write" << endl;
  213     }
  214 }
  215 
  216 void InputData::verifyWritesHaveData()
  217 {
  218     for ( InputItemList::Iter ii = inputItems; ii.lte(); ii++ )
  219         verifyWriteHasData( ii );
  220 }
  221 
  222 void InputData::writeOutput( InputItem *ii )
  223 {
  224     /* If it is the first input item then check if we need to write the BOM. */
  225     if ( ii->prev == 0 && utf8BomPresent )
  226         *outStream << (uchar)0xEF << (uchar)0xBB << (uchar) 0xBF;
  227 
  228     switch ( ii->type ) {
  229         case InputItem::Write: {
  230             CodeGenData *cgd = ii->pd->cgd;
  231             cgd->writeStatement( ii->loc, ii->writeArgs.size(),
  232                     ii->writeArgs, generateDot, hostLang );
  233             break;
  234         }
  235         case InputItem::HostData: {
  236             switch ( hostLang->backend ) {
  237                 case Direct:
  238                     if ( ii->loc.fileName != 0 ) {
  239                         if ( ii->prev != 0 )
  240                             *outStream << "\n";
  241                         (*hostLang->genLineDirective)( *outStream, !noLineDirectives, ii->loc.line, ii->loc.fileName );
  242                     }
  243                         
  244                     *outStream << ii->data.str();
  245                     break;
  246                 case Translated:
  247                     openHostBlock( '@', this, *outStream, inputFileName, ii->loc.line );
  248                     translatedHostData( *outStream, ii->data.str() );
  249                     *outStream << "}@";
  250                     break;
  251             }
  252             break;
  253         }
  254         case InputItem::EndSection: {
  255             break;
  256         }
  257     }
  258 }
  259 
  260 void InputData::closeOutput()
  261 {
  262     /* If writing to a file, delete the ostream, causing it to flush.
  263      * Standard out is flushed automatically. */
  264     if ( outputFileName != 0 ) {
  265         delete outStream;
  266         delete outFilter;
  267     }
  268 }
  269 
  270 void InputData::writeDot( ostream &out )
  271 {
  272     ParseData *pd = dotGenPd;
  273     GraphvizDotGen dotGen( this, pd->fsmCtx, pd->sectionGraph, pd->sectionName, pd->machineId, out );
  274     dotGen.write();
  275 }
  276 
  277 void InputData::processDot()
  278 {
  279     /* Compiles the DOT machines. */
  280     prepareSingleMachine();
  281 
  282     if ( errorCount > 0 )
  283         abortCompile( 1 );
  284 
  285     createOutputStream();
  286 
  287     if ( errorCount > 0 )
  288         abortCompile( 1 );
  289 
  290     /*
  291      * From this point on we should not be reporting any errors.
  292      */
  293 
  294     openOutput();
  295     writeDot( *outStream );
  296     closeOutput();
  297 }
  298 
  299 bool InputData::checkLastRef( InputItem *ii )
  300 {
  301     if ( generateDot )
  302         return true;
  303     
  304     if ( errorCount > 0 )
  305         return false;
  306         
  307     /*
  308      * 1. Go forward to next last reference.
  309      * 2. Fully process that machine, mark as processed.
  310      * 3. Move forward through input items until no longer 
  311      */
  312     if ( ii->section != 0 && ii->section->lastReference == ii ) {
  313         /* Fully Process. */
  314         ParseData *pd = ii->pd;
  315 
  316         if ( pd->instanceList.length() > 0 ) {
  317 #ifdef WITH_RAGEL_KELBT
  318             if ( ii->parser != 0 ) 
  319                 ii->parser->terminateParser();
  320 #endif
  321 
  322             FsmRes res = pd->prepareMachineGen( 0, hostLang );
  323 
  324             /* Compute exports from the export definitions. */
  325             pd->makeExports();
  326 
  327             if ( !res.success() )
  328                 return false;
  329 
  330             if ( errorCount > 0 )
  331                 return false;
  332 
  333             pd->generateReduced( inputFileName, codeStyle, *outStream, hostLang );
  334 
  335             if ( errorCount > 0 )
  336                 return false;
  337         }
  338 
  339         /* Mark all input items referencing the machine as processed. */
  340         InputItem *toMark = lastFlush;
  341         while ( true ) {
  342             toMark->processed = true;
  343 
  344             if ( toMark == ii )
  345                 break;
  346 
  347             toMark = toMark->next;
  348         }
  349 
  350         /* Move forward, flushing input items until we get to an unprocessed
  351          * input item. */
  352         while ( lastFlush != 0 && lastFlush->processed ) {
  353             verifyWriteHasData( lastFlush );
  354 
  355             if ( errorCount > 0 )
  356                 return false;
  357 
  358             /* Flush out. */
  359             writeOutput( lastFlush );
  360 
  361             lastFlush = lastFlush->next;
  362         }
  363     }
  364     return true;
  365 }
  366 
  367 void InputData::makeFirstInputItem()
  368 {
  369     /* Make the first input item. */
  370     InputItem *firstInputItem = new InputItem;
  371     firstInputItem->type = InputItem::HostData;
  372     firstInputItem->loc.fileName = inputFileName;
  373     firstInputItem->loc.line = 1;
  374     firstInputItem->loc.col = 1;
  375     inputItems.append( firstInputItem );
  376 }
  377 
  378 /* Send eof to all parsers. */
  379 void InputData::terminateAllParsers( )
  380 {
  381 #ifdef WITH_RAGEL_KELBT
  382     for ( ParserDict::Iter pdel = parserDict; pdel.lte(); pdel++ )
  383         pdel->value->terminateParser();
  384 #endif
  385 }
  386 
  387 void InputData::flushRemaining()
  388 {
  389     InputItem *item = inputItems.head;
  390 
  391     while ( item != 0 ) {
  392         checkLastRef( item );
  393         item = item->next;
  394     }
  395 
  396     /* Flush remaining items. */
  397     while ( lastFlush != 0 ) {
  398         /* Flush out. */
  399         writeOutput( lastFlush );
  400 
  401         lastFlush = lastFlush->next;
  402     }
  403 }
  404 
  405 void InputData::makeTranslateOutputFileName()
  406 {
  407     origOutputFileName = outputFileName;
  408     outputFileName = fileNameFromStem( outputFileName, ".ri" );
  409     genOutputFileName = outputFileName;
  410 }
  411 
  412 #ifdef WITH_RAGEL_KELBT
  413 void InputData::parseKelbt()
  414 {
  415     /*
  416      * Ragel Parser from ragel 6.
  417      */
  418     ifstream *inFileStream;
  419     istream *inStream;
  420 
  421     /* Open the input file for reading. */
  422     assert( inputFileName != 0 );
  423     inFileStream = new ifstream( inputFileName );
  424     if ( ! inFileStream->is_open() )
  425         error() << "could not open " << inputFileName << " for reading" << endp;
  426     inStream = inFileStream;
  427 
  428     makeFirstInputItem();
  429 
  430     Scanner scanner( this, inputFileName, *inStream, 0, 0, 0, false );
  431 
  432     scanner.sectionPass = true;
  433     scanner.do_scan();
  434 
  435     inStream->clear();
  436     inStream->seekg( 0, std::ios::beg );
  437     curItem = inputItems.head;
  438     lastFlush = inputItems.head;
  439 
  440     scanner.sectionPass = false;
  441     scanner.do_scan();
  442 
  443     /* Finished, final check for errors.. */
  444     if ( errorCount > 0 )
  445         abortCompile( 1 );
  446 
  447     /* Bail on above error. */
  448     if ( errorCount > 0 )
  449         abortCompile( 1 );
  450     
  451     delete inFileStream;
  452 }
  453 
  454 void InputData::processKelbt()
  455 {
  456     /* With the kelbt version we implement two parse passes. The first is used
  457      * to identify the last time that any given machine is referenced by a
  458      * ragel section. In the second pass we parse, compile, and emit as far
  459      * forward as possible when we encounter the last reference to a machine.
  460      * */
  461     
  462     if ( generateDot ) {
  463         parseKelbt();
  464         terminateAllParsers();
  465         processDot();
  466     }
  467     else {
  468         createOutputStream();
  469         openOutput();
  470         parseKelbt();
  471         flushRemaining();
  472         closeOutput();
  473     }
  474 
  475     assert( errorCount == 0 );
  476 }
  477 #endif
  478 
  479 bool InputData::parseReduce()
  480 {
  481     /*
  482      * Colm-based reduction parser introduced in ragel 7. 
  483      */
  484 
  485     TopLevel *topLevel = new TopLevel( frontendSections, this, hostLang,
  486             minimizeLevel, minimizeOpt );
  487 
  488     /* Check input file. File is actually opened by colm code. We don't
  489      * need to perform the check if in libragel since it comes in via a
  490      * string. */
  491     if ( input == 0 ) {
  492         ifstream *inFile = new ifstream( inputFileName );
  493         if ( ! inFile->is_open() )
  494             error() << "could not open " << inputFileName << " for reading" << endp;
  495         delete inFile;
  496     }
  497 
  498     if ( errorCount )
  499         return false;
  500 
  501     makeFirstInputItem();
  502     
  503     curItem = inputItems.head;
  504     lastFlush = inputItems.head;
  505 
  506 
  507     topLevel->reduceFile( "rlparse", inputFileName );
  508 
  509     if ( errorCount )
  510         return false;
  511 
  512     bool success = topLevel->success;
  513 
  514     delete topLevel;
  515     return success;
  516 }
  517 
  518 bool InputData::processReduce()
  519 {
  520     if ( generateDot ) {
  521         parseReduce();
  522         processDot();
  523         return true;
  524     }
  525     else {
  526         createOutputStream();
  527         openOutput();
  528 
  529         bool success = parseReduce();
  530         if ( success )
  531             flushRemaining();
  532 
  533         closeOutput();
  534 
  535         if ( !success && outputFileName != 0 )
  536             unlink( outputFileName );
  537 
  538         return success;
  539     }
  540 }
  541 
  542 bool InputData::process()
  543 {
  544     switch ( frontend ) {
  545         case KelbtBased: {
  546 #ifdef WITH_RAGEL_KELBT
  547             processKelbt();
  548 #endif
  549             return true;
  550         }
  551         case ReduceBased: {
  552             return processReduce();
  553         }
  554     }
  555     return false;
  556 }
  557 
  558 /* Print a summary of the options. */
  559 void InputData::usage()
  560 {
  561     info() <<
  562 "usage: ragel [options] file\n"
  563 "general:\n"
  564 "   -h, -H, -?, --help   Print this usage and exit\n"
  565 "   -v, --version        Print version information and exit\n"
  566 "   -o <file>            Write output to <file>\n"
  567 "   -s                   Print some statistics and compilation info to stderr\n"
  568 "   -d                   Do not remove duplicates from action lists\n"
  569 "   -I <dir>             Add <dir> to the list of directories to search\n"
  570 "                        for included an imported files\n"
  571 "   --rlhc               Show the rlhc command used to compile\n"
  572 "   --save-temps         Do not delete intermediate file during compilation\n"
  573 "   --no-intermediate    Disable call to rlhc, leave behind intermediate\n"
  574 "error reporting format:\n"
  575 "   --error-format=gnu   file:line:column: message (default)\n"
  576 "   --error-format=msvc  file(line,column): message\n"
  577 "fsm minimization:\n"
  578 "   -n                   Do not perform minimization\n"
  579 "   -m                   Minimize at the end of the compilation\n"
  580 "   -l                   Minimize after most operations (default)\n"
  581 "   -e                   Minimize after every operation\n"
  582 "visualization:\n"
  583 "   -V                   Generate a dot file for Graphviz\n"
  584 "   -p                   Display printable characters on labels\n"
  585 "   -S <spec>            FSM specification to output (for graphviz output)\n"
  586 "   -M <machine>         Machine definition/instantiation to output (for\n"
  587 "                        graphviz output)\n"
  588 "host language binaries:\n"
  589 "   ragel, ragel-c       C, C++, Obj-C or Obj-C++\n"
  590 "                        All code styles supported.\n"
  591 "   ragel-asm            GNU AS, x86_64, System V ABI.\n"
  592 "                        Generated in a code style equivalent to -G2\n"
  593 "   ragel-d              D           All code styles supported\n"
  594 "   ragel-go             Go          All code styles supported\n"
  595 "   ragel-csharp         C#          -T0 -T1 -F0 -F1 -G0 -G1\n"
  596 "   ragel-java           Java        -T0 -T1 -F0 -F1\n"
  597 "   ragel-ruby           Ruby        -T0 -T1 -F0 -F1\n"
  598 "   ragel-ocaml          OCaml       -T0 -T1 -F0 -F1\n"
  599 "   ragel-rust           Rust        -T0 -T1 -F0 -F1\n"
  600 "   ragel-julia          Julia       -T0 -T1 -F0 -F1\n"
  601 "   ragel-crack          Crack       -T0 -T1 -F0 -F1\n"
  602 "   ragel-js             JavaScript  -T0 -T1 -F0 -F1\n"
  603 "line directives:\n"
  604 "   -L                   Inhibit writing of #line directives\n"
  605 "code style:\n"
  606 "   -T0                  Binary search (default)\n"
  607 "   -T1                  Binary search with expanded actions \n"
  608 "   -F0                  Flat table\n"
  609 "   -F1                  Flat table with expanded actions\n"
  610 "   -G0                  Switch-driven\n"
  611 "   -G1                  Switch-driven with expanded actions\n"
  612 "   -G2                  Goto-driven with expanded actions\n"
  613 "large machines:\n"
  614 "   --integral-tables    Use integers for table data (default)\n"
  615 "   --string-tables      Encode table data into strings for faster host lang\n"
  616 "                        compilation\n"
  617 "analysis:\n"
  618 "   --prior-interaction          Search for condition-based general repetitions\n"
  619 "                                that will not function properly due to state mod\n"
  620 "                                overlap and must be NFA reps. \n"
  621 "   --conds-depth=D              Search for high-cost conditions inside a prefix\n"
  622 "                                of the machine (depth D from start state).\n"
  623 "   --state-limit=L              Report fail if number of states exceeds this\n"
  624 "                                during compilation.\n"
  625 "   --breadth-check=E1,E2,..     Report breadth cost of named entry points and\n"
  626 "                                the start state.\n"
  627 "   --input-histogram=FN         Input char histogram for breadth check. If\n"
  628 "                                unspecified a flat histogram is used.\n"
  629 "testing:\n"
  630 "   --kelbt-frontend        Compile using original ragel + kelbt frontend\n"
  631 "                           Requires ragel be built with ragel + kelbt support\n"
  632 "   --colm-frontend         Compile using a colm-based recursive descent\n"
  633 "                           frontend\n"
  634 "   --reduce-frontend       Compile using a colm-based reducer (default)\n"
  635 "   --var-backend           Use the variable-based backend even if the host lang\n"
  636 "                           supports goto-based\n"
  637 "   --supported-host-langs  Show supported host languages by command line arg\n"
  638 "   --supported-frontends   Show supported frontends\n"
  639 "   --supported-backends    Show supported backends\n"
  640 "   --force-libragel        Cause mainline to behave like libragel\n"
  641     ;   
  642 
  643     abortCompile( 0 );
  644 }
  645 
  646 /* Print version information and exit. */
  647 void InputData::version()
  648 {
  649     info() << "Ragel State Machine Compiler version " VERSION << " " PUBDATE << endl <<
  650             "Copyright (c) 2001-2021 by Dr. Adrian D. Thurston et al." << endl;
  651     abortCompile( 0 );
  652 }
  653 
  654 void InputData::showFrontends()
  655 {
  656     ostream &out = info();
  657     out << "--colm-frontend";
  658     out << " --reduce-frontend";
  659 #ifdef WITH_RAGEL_KELBT
  660     out << " --kelbt-frontend";
  661 #endif
  662     out << endl;
  663     abortCompile( 0 );
  664 }
  665 
  666 void InputData::showBackends()
  667 {
  668     info() << 
  669         "--direct-backend --colm-backend" << endl;
  670     abortCompile( 0 );
  671 }
  672 
  673 InputLoc makeInputLoc( const char *fileName, int line, int col )
  674 {
  675     InputLoc loc( fileName, line, col );
  676     return loc;
  677 }
  678 
  679 void escapeLineDirectivePath( std::ostream &out, char *path )
  680 {
  681     for ( char *pc = path; *pc != 0; pc++ ) {
  682         if ( *pc == '\\' )
  683             out << "\\\\";
  684         else
  685             out << *pc;
  686     }
  687 }
  688 
  689 /* Construct a new parameter checker with for paramSpec. */
  690 ParamCheck::ParamCheck( const char *paramSpec, int argc, const char **argv )
  691 :
  692     state(noparam),
  693     argOffset(0),
  694     curArg(0),
  695     iCurArg(1),
  696     paramSpec(paramSpec), 
  697     argc(argc), 
  698     argv(argv)
  699 {
  700 }
  701 
  702 /* Check a single option. Returns the index of the next parameter.  Sets p to
  703  * the arg character if valid, 0 otherwise.  Sets parg to the parameter arg if
  704  * there is one, NULL otherwise. */
  705 bool ParamCheck::check()
  706 {
  707     bool requiresParam;
  708 
  709     if ( iCurArg >= argc ) {            /* Off the end of the arg list. */
  710         state = noparam;
  711         return false;
  712     }
  713 
  714     if ( argOffset != 0 && *argOffset == 0 ) {
  715         /* We are at the end of an arg string. */
  716         iCurArg += 1;
  717         if ( iCurArg >= argc ) {
  718             state = noparam;
  719             return false;
  720         }
  721         argOffset = 0;
  722     }
  723 
  724     if ( argOffset == 0 ) {
  725         /* Set the current arg. */
  726         curArg = argv[iCurArg];
  727 
  728         /* We are at the beginning of an arg string. */
  729         if ( argv[iCurArg] == 0 ||        /* Argv[iCurArg] is null. */
  730              argv[iCurArg][0] != '-' ||   /* Not a param. */
  731              argv[iCurArg][1] == 0 ) {    /* Only a dash. */
  732             parameter = 0;
  733             paramArg = 0;
  734 
  735             iCurArg += 1;
  736             state = noparam;
  737             return true;
  738         }
  739         argOffset = argv[iCurArg] + 1;
  740     }
  741 
  742     /* Get the arg char. */
  743     char argChar = *argOffset;
  744     
  745     /* Loop over all the parms and look for a match. */
  746     const char *pSpec = paramSpec;
  747     while ( *pSpec != 0 ) {
  748         char pSpecChar = *pSpec;
  749 
  750         /* If there is a ':' following the char then
  751          * it requires a parm.  If a parm is required
  752          * then move ahead two in the parmspec. Otherwise
  753          * move ahead one in the parm spec. */
  754         if ( pSpec[1] == ':' ) {
  755             requiresParam = true;
  756             pSpec += 2;
  757         }
  758         else {
  759             requiresParam = false;
  760             pSpec += 1;
  761         }
  762 
  763         /* Do we have a match. */
  764         if ( argChar == pSpecChar ) {
  765             if ( requiresParam ) {
  766                 if ( argOffset[1] == 0 ) {
  767                     /* The param must follow. */
  768                     if ( iCurArg + 1 == argc ) {
  769                         /* We are the last arg so there
  770                          * cannot be a parameter to it. */
  771                         parameter = argChar;
  772                         paramArg = 0;
  773                         iCurArg += 1;
  774                         argOffset = 0;
  775                         state = invalid;
  776                         return true;
  777                     }
  778                     else {
  779                         /* the parameter to the arg is the next arg. */
  780                         parameter = pSpecChar;
  781                         paramArg = argv[iCurArg + 1];
  782                         iCurArg += 2;
  783                         argOffset = 0;
  784                         state = match;
  785                         return true;
  786                     }
  787                 }
  788                 else {
  789                     /* The param for the arg is built in. */
  790                     parameter = pSpecChar;
  791                     paramArg = argOffset + 1;
  792                     iCurArg += 1;
  793                     argOffset = 0;
  794                     state = match;
  795                     return true;
  796                 }
  797             }
  798             else {
  799                 /* Good, we matched the parm and no
  800                  * arg is required. */
  801                 parameter = pSpecChar;
  802                 paramArg = 0;
  803                 argOffset += 1;
  804                 state = match;
  805                 return true;
  806             }
  807         }
  808     }
  809 
  810     /* We did not find a match. Bad Argument. */
  811     parameter = argChar;
  812     paramArg = 0;
  813     argOffset += 1;
  814     state = invalid;
  815     return true;
  816 }
  817 
  818 
  819 void InputData::parseArgs( int argc, const char **argv )
  820 {
  821     ParamCheck pc( "o:dnmleabjkS:M:I:vHh?-:sT:F:W:G:LpV", argc, argv );
  822 
  823     /* Decide if we were invoked using a path variable, or with an explicit path. */
  824     const char *lastSlash = strrchr( argv[0], '/' );
  825     if ( lastSlash == 0 ) {
  826         /* Defualt to the the binary install location. */
  827         dirName = BINDIR;
  828     }
  829     else {
  830         /* Compute dirName from argv0. */
  831         dirName = string( argv[0], lastSlash - argv[0] );
  832     }
  833 
  834     /* FIXME: Need to check code styles VS langauge. */
  835 
  836     while ( pc.check() ) {
  837         switch ( pc.state ) {
  838         case ParamCheck::match:
  839             switch ( pc.parameter ) {
  840             case 'V':
  841                 generateDot = true;
  842                 break;
  843 
  844             /* Output. */
  845             case 'o':
  846                 if ( *pc.paramArg == 0 )
  847                     error() << "a zero length output file name was given" << endl;
  848                 else if ( outputFileName != 0 )
  849                     error() << "more than one output file name was given" << endl;
  850                 else {
  851                     /* Ok, remember the output file name. */
  852                     outputFileName = new char[strlen(pc.paramArg)+1];
  853                     strcpy( (char*)outputFileName, pc.paramArg );
  854                 }
  855                 break;
  856 
  857             /* Flag for turning off duplicate action removal. */
  858             case 'd':
  859                 wantDupsRemoved = false;
  860                 break;
  861 
  862             /* Minimization, mostly hidden options. */
  863             case 'n':
  864                 minimizeOpt = MinimizeNone;
  865                 break;
  866             case 'm':
  867                 minimizeOpt = MinimizeEnd;
  868                 break;
  869             case 'l':
  870                 minimizeOpt = MinimizeMostOps;
  871                 break;
  872             case 'e':
  873                 minimizeOpt = MinimizeEveryOp;
  874                 break;
  875             case 'a':
  876             #ifdef TO_UPGRADE_CONDS
  877                 minimizeLevel = MinimizeApprox;
  878             #else
  879                 error() << "minimize approx (-a) unsupported in this version" << endp;
  880             #endif
  881                 break;
  882             case 'b':
  883             #ifdef TO_UPGRADE_CONDS
  884                 minimizeLevel = MinimizeStable;
  885             #else
  886                 error() << "minimize stable (-b) unsupported in this version" << endp;
  887             #endif
  888                 break;
  889             case 'j':
  890                 minimizeLevel = MinimizePartition1;
  891                 break;
  892             case 'k':
  893                 minimizeLevel = MinimizePartition2;
  894                 break;
  895 
  896             /* Machine spec. */
  897             case 'S':
  898                 if ( *pc.paramArg == 0 )
  899                     error() << "please specify an argument to -S" << endl;
  900                 else if ( machineSpec != 0 )
  901                     error() << "more than one -S argument was given" << endl;
  902                 else {
  903                     /* Ok, remember the path to the machine to generate. */
  904                     machineSpec = pc.paramArg;
  905                 }
  906                 break;
  907 
  908             /* Machine path. */
  909             case 'M':
  910                 if ( *pc.paramArg == 0 )
  911                     error() << "please specify an argument to -M" << endl;
  912                 else if ( machineName != 0 )
  913                     error() << "more than one -M argument was given" << endl;
  914                 else {
  915                     /* Ok, remember the machine name to generate. */
  916                     machineName = pc.paramArg;
  917                 }
  918                 break;
  919 
  920             case 'I':
  921                 if ( *pc.paramArg == 0 )
  922                     error() << "please specify an argument to -I" << endl;
  923                 else {
  924                     includePaths.append( pc.paramArg );
  925                 }
  926                 break;
  927 
  928             /* Version and help. */
  929             case 'v':
  930                 version();
  931                 break;
  932             case 'H': case 'h': case '?':
  933                 usage();
  934                 break;
  935             case 's':
  936                 printStatistics = true;
  937                 break;
  938             case '-': {
  939                 char *arg = strdup( pc.paramArg );
  940                 char *eq = strchr( arg, '=' );
  941 
  942                 if ( eq != 0 )
  943                     *eq++ = 0;
  944 
  945                 if ( strcmp( arg, "help" ) == 0 )
  946                     usage();
  947                 else if ( strcmp( arg, "version" ) == 0 )
  948                     version();
  949                 else if ( strcmp( arg, "error-format" ) == 0 ) {
  950                     if ( eq == 0 )
  951                         error() << "expecting '=value' for error-format" << endl;
  952                     else if ( strcmp( eq, "gnu" ) == 0 )
  953                         errorFormat = ErrorFormatGNU;
  954                     else if ( strcmp( eq, "msvc" ) == 0 )
  955                         errorFormat = ErrorFormatMSVC;
  956                     else
  957                         error() << "invalid value for error-format" << endl;
  958                 }
  959                 else if ( strcmp( arg, "rlhc" ) == 0 )
  960                     rlhc = true;
  961                 else if ( strcmp( arg, "no-intermediate" ) == 0 )
  962                     noIntermediate = true;
  963 #ifdef WITH_RAGEL_KELBT
  964                 else if ( strcmp( arg, "kelbt-frontend" ) == 0 ) {
  965                     frontend = KelbtBased;
  966                     frontendSpecified = true;
  967                 }
  968 #else
  969                 else if ( strcmp( arg, "kelbt-frontend" ) == 0 ) {
  970                     error() << "--kelbt-frontend specified but, "
  971                             "ragel not built with ragel+kelbt support" << endp;
  972                 }
  973 #endif
  974                 else if ( strcmp( arg, "reduce-frontend" ) == 0 ) {
  975                     frontend = ReduceBased;
  976                     frontendSpecified = true;
  977                 }
  978                 else if ( strcmp( arg, "string-tables" ) == 0 )
  979                     stringTables = true;
  980                 else if ( strcmp( arg, "integral-tables" ) == 0 )
  981                     stringTables = false;
  982                 else if ( strcmp( arg, "supported-frontends" ) == 0 )
  983                     showFrontends();
  984                 else if ( strcmp( arg, "supported-backends" ) == 0 )
  985                     showBackends();
  986                 else if ( strcmp( arg, "save-temps" ) == 0 )
  987                     saveTemps = true;
  988 
  989                 else if ( strcmp( arg, "prior-interaction" ) == 0 )
  990                     checkPriorInteraction = true;
  991                 else if ( strcmp( arg, "conds-depth" ) == 0 )
  992                     condsCheckDepth = strtol( eq, 0, 10 );
  993                 else if ( strcmp( arg, "state-limit" ) == 0 )
  994                     stateLimit = strtol( eq, 0, 10 );
  995 
  996                 else if ( strcmp( arg, "breadth-check" ) == 0 ) {
  997                     char *ptr = 0;
  998                     while ( true ) {
  999                         char *label = strtok_r( eq, ",", &ptr );
 1000                         eq = NULL;
 1001                         if ( label == NULL )
 1002                             break;
 1003                         breadthLabels.append( strdup( label ) );
 1004                     }
 1005                     checkBreadth = true;
 1006                 }
 1007                 else if ( strcmp( arg, "input-histogram" ) == 0 )
 1008                     histogramFn = strdup(eq);
 1009                 else if ( strcmp( arg, "var-backend" ) == 0 )
 1010                     forceVar = true;
 1011                 else if ( strcmp( arg, "no-fork" ) == 0 )
 1012                     noFork = true;
 1013                 else {
 1014                     error() << "--" << pc.paramArg << 
 1015                             " is an invalid argument" << endl;
 1016                 }
 1017                 free( arg );
 1018                 break;
 1019             }
 1020 
 1021             /* Passthrough args. */
 1022             case 'T': 
 1023                 if ( pc.paramArg[0] == '0' )
 1024                     codeStyle = GenBinaryLoop;
 1025                 else if ( pc.paramArg[0] == '1' )
 1026                     codeStyle = GenBinaryExp;
 1027                 else {
 1028                     error() << "-T" << pc.paramArg[0] << 
 1029                             " is an invalid argument" << endl;
 1030                     abortCompile( 1 );
 1031                 }
 1032                 break;
 1033             case 'F': 
 1034                 if ( pc.paramArg[0] == '0' )
 1035                     codeStyle = GenFlatLoop;
 1036                 else if ( pc.paramArg[0] == '1' )
 1037                     codeStyle = GenFlatExp;
 1038                 else {
 1039                     error() << "-F" << pc.paramArg[0] << 
 1040                             " is an invalid argument" << endl;
 1041                     abortCompile( 1 );
 1042                 }
 1043                 break;
 1044             case 'G': 
 1045                 if ( pc.paramArg[0] == '0' )
 1046                     codeStyle = GenGotoLoop;
 1047                 else if ( pc.paramArg[0] == '1' )
 1048                     codeStyle = GenGotoExp;
 1049                 else if ( pc.paramArg[0] == '2' )
 1050                     codeStyle = GenIpGoto;
 1051                 else if ( pc.paramArg[0] == 'T' && pc.paramArg[1] == '2' ) {
 1052                     codeStyle = GenIpGoto;
 1053                     maxTransitions = 32;
 1054                 } else {
 1055                     error() << "-G" << pc.paramArg[0] << 
 1056                             " is an invalid argument" << endl;
 1057                     abortCompile( 1 );
 1058                 }
 1059                 break;
 1060             case 'W': 
 1061                 if ( pc.paramArg[0] == '0' )
 1062                     codeStyle = GenSwitchLoop;
 1063                 else if ( pc.paramArg[0] == '1' )
 1064                     codeStyle = GenSwitchExp;
 1065                 else {
 1066                     error() << "-G" << pc.paramArg[0] << 
 1067                             " is an invalid argument" << endl;
 1068                     abortCompile( 1 );
 1069                 }
 1070                 break;
 1071 
 1072             case 'p':
 1073                 displayPrintables = true;
 1074                 break;
 1075 
 1076             case 'L':
 1077                 noLineDirectives = true;
 1078                 break;
 1079             }
 1080             break;
 1081 
 1082         case ParamCheck::invalid:
 1083             error() << "-" << pc.parameter << " is an invalid argument" << endl;
 1084             break;
 1085 
 1086         case ParamCheck::noparam:
 1087             /* It is interpreted as an input file. */
 1088             if ( *pc.curArg == 0 )
 1089                 error() << "a zero length input file name was given" << endl;
 1090             else if ( inputFileName != 0 )
 1091                 error() << "more than one input file name was given" << endl;
 1092             else {
 1093                 /* OK, Remember the filename. */
 1094                 inputFileName = pc.curArg;
 1095             }
 1096             break;
 1097         }
 1098     }
 1099 }
 1100 
 1101 void InputData::loadHistogram()
 1102 {
 1103     const int alphsize = 256;
 1104 
 1105     /* Init a default. */
 1106     histogram = new double[alphsize];
 1107     ifstream h( histogramFn );
 1108     if ( !h.is_open() )
 1109         error() << "histogram read: failed to open file: " << histogramFn << endp;
 1110 
 1111     int i = 0;
 1112     double value;
 1113     while ( true ) {
 1114         if ( h >> value ) {
 1115             if ( i >= alphsize ) {
 1116                 /* Too many items. */
 1117                 error() << "histogram read: too many histogram values,"
 1118                         " expecting " << alphsize << " (for char alphabet)" << endp;
 1119             }
 1120             histogram[i] = value;
 1121             i++;
 1122         }
 1123         else {
 1124             /* Read failure. */
 1125             if ( h.eof() ) {
 1126                 if ( i < alphsize ) {
 1127                     error() << "histogram read: fell short of " <<
 1128                             alphsize << " items" << endp;
 1129                 }
 1130                 break;
 1131             }
 1132             else {
 1133                 error() << "histogram read: error at item " << i << endp;
 1134             }
 1135         }
 1136     }
 1137 }
 1138 
 1139 void InputData::defaultHistogram()
 1140 {
 1141     /* Flat histogram. */
 1142     const int alphsize = 256;
 1143     histogram = new double[alphsize];
 1144     for ( int i = 0; i < alphsize; i++ ) {
 1145         histogram[i] = 1.0 / (double)alphsize;
 1146     }
 1147 }
 1148 
 1149 void InputData::checkArgs()
 1150 {
 1151     /* Require an input file. If we use standard in then we won't have a file
 1152      * name on which to base the output. */
 1153     if ( inputFileName == 0 )
 1154         error() << "no input file given" << endl;
 1155 
 1156     /* Bail on argument processing errors. */
 1157     if ( errorCount > 0 )
 1158         abortCompile( 1 );
 1159 
 1160     /* Make sure we are not writing to the same file as the input file. */
 1161     if ( inputFileName != 0 && outputFileName != 0 && 
 1162             strcmp( inputFileName, outputFileName  ) == 0 )
 1163     {
 1164         error() << "output file \"" << outputFileName  << 
 1165                 "\" is the same as the input file" << endp;
 1166     }
 1167 
 1168     if ( !frontendSpecified )
 1169         frontend = ReduceBased;
 1170 
 1171     if ( checkBreadth ) {
 1172         if ( histogramFn != 0 )
 1173             loadHistogram();
 1174         else
 1175             defaultHistogram();
 1176     }
 1177 }
 1178 
 1179 char *InputData::readInput( const char *inputFileName )
 1180 {
 1181     struct stat st;
 1182     int res = stat( inputFileName, &st );
 1183     if ( res != 0 ) {
 1184         error() << inputFileName << ": stat failed: " << strerror(errno) << endl;
 1185         return 0;
 1186     }
 1187 
 1188     std::ifstream in( inputFileName );
 1189     if ( !in.is_open() ) {
 1190         error() << inputFileName << ": could not open in force-libragel mode";
 1191         return 0;
 1192     }
 1193 
 1194     char *input = new char[st.st_size+1];
 1195     in.read( input, st.st_size );
 1196     if ( in.gcount() != st.st_size ) {
 1197         error() << inputFileName << ": could not read in force-libragel mode";
 1198         delete[] input;
 1199         return 0;
 1200     }
 1201     input[st.st_size] = 0;
 1202 
 1203     return input;
 1204 }
 1205 
 1206 int InputData::runFrontend( int argc, const char **argv )
 1207 {
 1208     if ( !process() )
 1209         return -1;
 1210     return 0;
 1211 }
 1212 
 1213 int InputData::runRlhc( int argc, const char **argv )
 1214 {
 1215     struct colm_program *prg;
 1216     int exit_status;
 1217 
 1218     prg = colm_new_program( rlhcSections );
 1219     colm_set_debug( prg, 0 );
 1220     colm_run_program( prg, argc, argv );
 1221     exit_status = colm_delete_program( prg );
 1222     return exit_status;
 1223 }
 1224 
 1225 /* Run a job (frontend or backend). If we want forks then we return the result
 1226  * via the process's exit code. otherwise it comes back on the stack. */
 1227 int InputData::runJob( const char *what, IdProcess idProcess, int argc, const char **argv )
 1228 {
 1229 #if defined(HAVE_SYS_WAIT_H)
 1230     if ( !noFork ) {
 1231         pid_t pid = fork();
 1232 
 1233         if ( pid == 0 ) {
 1234             int es = (this->*idProcess)( argc, argv );
 1235             exit( es );
 1236         }
 1237 
 1238         int status = 0;
 1239         waitpid( pid, &status, 0 );
 1240         if ( WIFSIGNALED(status) ) {
 1241             error() << what << " stopped by signal: " << WTERMSIG(status) << std::endl;
 1242             return -1;
 1243         }
 1244 
 1245         return WEXITSTATUS( status );
 1246     }
 1247 #endif
 1248     return (this->*idProcess)( argc, argv );
 1249 }
 1250 
 1251 int InputData::main( int argc, const char **argv )
 1252 {
 1253     int code = 0;
 1254     try {
 1255         parseArgs( argc, argv );
 1256         checkArgs();
 1257         if ( !generateDot )
 1258             makeDefaultFileName();
 1259 
 1260         if ( !process() )
 1261             abortCompile( 1 );
 1262     }
 1263     catch ( const AbortCompile &ac ) {
 1264         code = ac.code;
 1265     }
 1266 
 1267     return code;
 1268 }
 1269 
 1270 int InputData::rlhcMain( int argc, const char **argv )
 1271 {
 1272     int code = 0;
 1273     try {
 1274         parseArgs( argc, argv );
 1275         checkArgs();
 1276         makeDefaultFileName();
 1277         makeTranslateOutputFileName();
 1278 
 1279         int es = runJob( "frontend", &InputData::runFrontend, 0, 0 );
 1280 
 1281         if ( es != 0 )
 1282             return es;
 1283 
 1284         /* rlhc <input> <output> */
 1285         const char *_argv[] = { "rlhc",
 1286                 genOutputFileName.c_str(),
 1287                 origOutputFileName.c_str(), 0 };
 1288 
 1289         return runJob( "rlhc", &InputData::runRlhc, 3, _argv );
 1290     }
 1291     catch ( const AbortCompile &ac ) {
 1292         code = ac.code;
 1293     }
 1294     return code;
 1295 }