"Fossies" - the Fresh Open Source Software Archive

Member "pcre-8.42/pcre_scanner.h" (31 Jan 2014, 6600 Bytes) of package /linux/misc/pcre-8.42.tar.bz2:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "pcre_scanner.h" see the Fossies "Dox" file reference documentation.

    1 // Copyright (c) 2005, Google Inc.
    2 // All rights reserved.
    3 //
    4 // Redistribution and use in source and binary forms, with or without
    5 // modification, are permitted provided that the following conditions are
    6 // met:
    7 //
    8 //     * Redistributions of source code must retain the above copyright
    9 // notice, this list of conditions and the following disclaimer.
   10 //     * Redistributions in binary form must reproduce the above
   11 // copyright notice, this list of conditions and the following disclaimer
   12 // in the documentation and/or other materials provided with the
   13 // distribution.
   14 //     * Neither the name of Google Inc. nor the names of its
   15 // contributors may be used to endorse or promote products derived from
   16 // this software without specific prior written permission.
   17 //
   18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
   22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
   28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   29 //
   30 // Author: Sanjay Ghemawat
   31 //
   32 // Regular-expression based scanner for parsing an input stream.
   33 //
   34 // Example 1: parse a sequence of "var = number" entries from input:
   35 //
   36 //      Scanner scanner(input);
   37 //      string var;
   38 //      int number;
   39 //      scanner.SetSkipExpression("\\s+"); // Skip any white space we encounter
   40 //      while (scanner.Consume("(\\w+) = (\\d+)", &var, &number)) {
   41 //        ...;
   42 //      }
   43 
   44 #ifndef _PCRE_SCANNER_H
   45 #define _PCRE_SCANNER_H
   46 
   47 #include <assert.h>
   48 #include <string>
   49 #include <vector>
   50 
   51 #include <pcrecpp.h>
   52 #include <pcre_stringpiece.h>
   53 
   54 namespace pcrecpp {
   55 
   56 class PCRECPP_EXP_DEFN Scanner {
   57  public:
   58   Scanner();
   59   explicit Scanner(const std::string& input);
   60   ~Scanner();
   61 
   62   // Return current line number.  The returned line-number is
   63   // one-based.  I.e. it returns 1 + the number of consumed newlines.
   64   //
   65   // Note: this method may be slow.  It may take time proportional to
   66   // the size of the input.
   67   int LineNumber() const;
   68 
   69   // Return the byte-offset that the scanner is looking in the
   70   // input data;
   71   int Offset() const;
   72 
   73   // Return true iff the start of the remaining input matches "re"
   74   bool LookingAt(const RE& re) const;
   75 
   76   // Return true iff all of the following are true
   77   //    a. the start of the remaining input matches "re",
   78   //    b. if any arguments are supplied, matched sub-patterns can be
   79   //       parsed and stored into the arguments.
   80   // If it returns true, it skips over the matched input and any
   81   // following input that matches the "skip" regular expression.
   82   bool Consume(const RE& re,
   83                const Arg& arg0 = RE::no_arg,
   84                const Arg& arg1 = RE::no_arg,
   85                const Arg& arg2 = RE::no_arg
   86                // TODO: Allow more arguments?
   87                );
   88 
   89   // Set the "skip" regular expression.  If after consuming some data,
   90   // a prefix of the input matches this RE, it is automatically
   91   // skipped.  For example, a programming language scanner would use
   92   // a skip RE that matches white space and comments.
   93   //
   94   //    scanner.SetSkipExpression("\\s+|//.*|/[*](.|\n)*?[*]/");
   95   //
   96   // Skipping repeats as long as it succeeds.  We used to let people do
   97   // this by writing "(...)*" in the regular expression, but that added
   98   // up to lots of recursive calls within the pcre library, so now we
   99   // control repetition explicitly via the function call API.
  100   //
  101   // You can pass NULL for "re" if you do not want any data to be skipped.
  102   void Skip(const char* re);   // DEPRECATED; does *not* repeat
  103   void SetSkipExpression(const char* re);
  104 
  105   // Temporarily pause "skip"ing. This
  106   //   Skip("Foo"); code ; DisableSkip(); code; EnableSkip()
  107   // is similar to
  108   //   Skip("Foo"); code ; Skip(NULL); code ; Skip("Foo");
  109   // but avoids creating/deleting new RE objects.
  110   void DisableSkip();
  111 
  112   // Reenable previously paused skipping.  Any prefix of the input
  113   // that matches the skip pattern is immediately dropped.
  114   void EnableSkip();
  115 
  116   /***** Special wrappers around SetSkip() for some common idioms *****/
  117 
  118   // Arranges to skip whitespace, C comments, C++ comments.
  119   // The overall RE is a disjunction of the following REs:
  120   //    \\s                     whitespace
  121   //    //.*\n                  C++ comment
  122   //    /[*](.|\n)*?[*]/        C comment (x*? means minimal repetitions of x)
  123   // We get repetition via the semantics of SetSkipExpression, not by using *
  124   void SkipCXXComments() {
  125     SetSkipExpression("\\s|//.*\n|/[*](?:\n|.)*?[*]/");
  126   }
  127 
  128   void set_save_comments(bool comments) {
  129     save_comments_ = comments;
  130   }
  131 
  132   bool save_comments() {
  133     return save_comments_;
  134   }
  135 
  136   // Append to vector ranges the comments found in the
  137   // byte range [start,end] (inclusive) of the input data.
  138   // Only comments that were extracted entirely within that
  139   // range are returned: no range splitting of atomically-extracted
  140   // comments is performed.
  141   void GetComments(int start, int end, std::vector<StringPiece> *ranges);
  142 
  143   // Append to vector ranges the comments added
  144   // since the last time this was called. This
  145   // functionality is provided for efficiency when
  146   // interleaving scanning with parsing.
  147   void GetNextComments(std::vector<StringPiece> *ranges);
  148 
  149  private:
  150   std::string   data_;          // All the input data
  151   StringPiece   input_;         // Unprocessed input
  152   RE*           skip_;          // If non-NULL, RE for skipping input
  153   bool          should_skip_;   // If true, use skip_
  154   bool          skip_repeat_;   // If true, repeat skip_ as long as it works
  155   bool          save_comments_; // If true, aggregate the skip expression
  156 
  157   // the skipped comments
  158   // TODO: later consider requiring that the StringPieces be added
  159   // in order by their start position
  160   std::vector<StringPiece> *comments_;
  161 
  162   // the offset into comments_ that has been returned by GetNextComments
  163   int           comments_offset_;
  164 
  165   // helper function to consume *skip_ and honour
  166   // save_comments_
  167   void ConsumeSkip();
  168 };
  169 
  170 }   // namespace pcrecpp
  171 
  172 #endif /* _PCRE_SCANNER_H */