"Fossies" - the Fresh Open Source Software Archive

Member "UXP-2019.06.08/js/src/frontend/TokenStream.h" (8 Jun 2019, 39124 Bytes) of package /linux/www/UXP-2019.06.08.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. See also the latest Fossies "Diffs" side-by-side code changes report for "TokenStream.h": 2019.03.27_vs_2019.06.08.

    1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
    2  * vim: set ts=8 sts=4 et sw=4 tw=99:
    3  * This Source Code Form is subject to the terms of the Mozilla Public
    4  * License, v. 2.0. If a copy of the MPL was not distributed with this
    5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
    6 
    7 #ifndef frontend_TokenStream_h
    8 #define frontend_TokenStream_h
    9 
   10 // JS lexical scanner interface.
   11 
   12 #include "mozilla/ArrayUtils.h"
   13 #include "mozilla/Assertions.h"
   14 #include "mozilla/Attributes.h"
   15 #include "mozilla/DebugOnly.h"
   16 #include "mozilla/PodOperations.h"
   17 
   18 #include <stdarg.h>
   19 #include <stddef.h>
   20 #include <stdio.h>
   21 
   22 #include "jscntxt.h"
   23 #include "jspubtd.h"
   24 
   25 #include "frontend/TokenKind.h"
   26 #include "js/UniquePtr.h"
   27 #include "js/Vector.h"
   28 #include "vm/RegExpObject.h"
   29 
   30 struct KeywordInfo;
   31 
   32 namespace js {
   33 namespace frontend {
   34 
   35 class AutoAwaitIsKeyword;
   36 
   37 struct TokenPos {
   38     uint32_t    begin;  // Offset of the token's first char.
   39     uint32_t    end;    // Offset of 1 past the token's last char.
   40 
   41     TokenPos() {}
   42     TokenPos(uint32_t begin, uint32_t end) : begin(begin), end(end) {}
   43 
   44     // Return a TokenPos that covers left, right, and anything in between.
   45     static TokenPos box(const TokenPos& left, const TokenPos& right) {
   46         MOZ_ASSERT(left.begin <= left.end);
   47         MOZ_ASSERT(left.end <= right.begin);
   48         MOZ_ASSERT(right.begin <= right.end);
   49         return TokenPos(left.begin, right.end);
   50     }
   51 
   52     bool operator==(const TokenPos& bpos) const {
   53         return begin == bpos.begin && end == bpos.end;
   54     }
   55 
   56     bool operator!=(const TokenPos& bpos) const {
   57         return begin != bpos.begin || end != bpos.end;
   58     }
   59 
   60     bool operator <(const TokenPos& bpos) const {
   61         return begin < bpos.begin;
   62     }
   63 
   64     bool operator <=(const TokenPos& bpos) const {
   65         return begin <= bpos.begin;
   66     }
   67 
   68     bool operator >(const TokenPos& bpos) const {
   69         return !(*this <= bpos);
   70     }
   71 
   72     bool operator >=(const TokenPos& bpos) const {
   73         return !(*this < bpos);
   74     }
   75 
   76     bool encloses(const TokenPos& pos) const {
   77         return begin <= pos.begin && pos.end <= end;
   78     }
   79 };
   80 
   81 enum DecimalPoint { NoDecimal = false, HasDecimal = true };
   82 
   83 class TokenStream;
   84 
   85 struct Token
   86 {
   87   private:
   88     // Sometimes the parser needs to inform the tokenizer to interpret
   89     // subsequent text in a particular manner: for example, to tokenize a
   90     // keyword as an identifier, not as the actual keyword, on the right-hand
   91     // side of a dotted property access.  Such information is communicated to
   92     // the tokenizer as a Modifier when getting the next token.
   93     //
   94     // Ideally this definition would reside in TokenStream as that's the real
   95     // user, but the debugging-use of it here causes a cyclic dependency (and
   96     // C++ provides no way to forward-declare an enum inside a class).  So
   97     // define it here, then typedef it into TokenStream with static consts to
   98     // bring the initializers into scope.
   99     enum Modifier
  100     {
  101         // Normal operation.
  102         None,
  103 
  104         // Looking for an operand, not an operator.  In practice, this means
  105         // that when '/' is seen, we look for a regexp instead of just returning
  106         // TOK_DIV.
  107         Operand,
  108 
  109         // Treat keywords as names by returning TOK_NAME.
  110         KeywordIsName,
  111 
  112         // Treat subsequent characters as the tail of a template literal, after
  113         // a template substitution, beginning with a "}", continuing with zero
  114         // or more template literal characters, and ending with either "${" or
  115         // the end of the template literal.  For example:
  116         //
  117         //   var entity = "world";
  118         //   var s = `Hello ${entity}!`;
  119         //                          ^ TemplateTail context
  120         TemplateTail,
  121     };
  122     enum ModifierException
  123     {
  124         NoException,
  125 
  126         // Used in following 2 cases:
  127         // a) After |yield| we look for a token on the same line that starts an
  128         // expression (Operand): |yield <expr>|.  If no token is found, the
  129         // |yield| stands alone, and the next token on a subsequent line must
  130         // be: a comma continuing a comma expression, a semicolon terminating
  131         // the statement that ended with |yield|, or the start of another
  132         // statement (possibly an expression statement).  The comma/semicolon
  133         // cases are gotten as operators (None), contrasting with Operand
  134         // earlier.
  135         // b) After an arrow function with a block body in an expression
  136         // statement, the next token must be: a colon in a conditional
  137         // expression, a comma continuing a comma expression, a semicolon
  138         // terminating the statement, or the token on a subsequent line that is
  139         // the start of another statement (possibly an expression statement).
  140         // Colon is gotten as operator (None), and it should only be gotten in
  141         // conditional expression and missing it results in SyntaxError.
  142         // Comma/semicolon cases are also gotten as operators (None), and 4th
  143         // case is gotten after them.  If no comma/semicolon found but EOL,
  144         // the next token should be gotten as operand in 4th case (especially if
  145         // '/' is the first character).  So we should peek the token as
  146         // operand before try getting colon/comma/semicolon.
  147         // See also the comment in Parser::assignExpr().
  148         NoneIsOperand,
  149 
  150         // If a semicolon is inserted automatically, the next token is already
  151         // gotten with None, but we expect Operand.
  152         OperandIsNone,
  153 
  154         // If name of method definition is `get` or `set`, the next token is
  155         // already gotten with KeywordIsName, but we expect None.
  156         NoneIsKeywordIsName,
  157     };
  158     friend class TokenStream;
  159 
  160   public:
  161     TokenKind           type;           // char value or above enumerator
  162     TokenPos            pos;            // token position in file
  163     union {
  164       private:
  165         friend struct Token;
  166         PropertyName*   name;          // non-numeric atom
  167         JSAtom*         atom;          // potentially-numeric atom
  168         struct {
  169             double      value;          // floating point number
  170             DecimalPoint decimalPoint;  // literal contains '.'
  171         } number;
  172         RegExpFlag      reflags;        // regexp flags; use tokenbuf to access
  173                                         //   regexp chars
  174     } u;
  175 #ifdef DEBUG
  176     Modifier modifier;                  // Modifier used to get this token
  177     ModifierException modifierException; // Exception for this modifier
  178 #endif
  179 
  180     // Mutators
  181 
  182     void setName(PropertyName* name) {
  183         MOZ_ASSERT(type == TOK_NAME);
  184         u.name = name;
  185     }
  186 
  187     void setAtom(JSAtom* atom) {
  188         MOZ_ASSERT(type == TOK_STRING ||
  189                    type == TOK_TEMPLATE_HEAD ||
  190                    type == TOK_NO_SUBS_TEMPLATE);
  191         u.atom = atom;
  192     }
  193 
  194     void setRegExpFlags(js::RegExpFlag flags) {
  195         MOZ_ASSERT(type == TOK_REGEXP);
  196         MOZ_ASSERT((flags & AllFlags) == flags);
  197         u.reflags = flags;
  198     }
  199 
  200     void setNumber(double n, DecimalPoint decimalPoint) {
  201         MOZ_ASSERT(type == TOK_NUMBER);
  202         u.number.value = n;
  203         u.number.decimalPoint = decimalPoint;
  204     }
  205 
  206     // Type-safe accessors
  207 
  208     PropertyName* name() const {
  209         MOZ_ASSERT(type == TOK_NAME);
  210         return u.name->JSAtom::asPropertyName(); // poor-man's type verification
  211     }
  212 
  213     bool nameContainsEscape() const {
  214         PropertyName* n = name();
  215         return pos.begin + n->length() != pos.end;
  216     }
  217 
  218     JSAtom* atom() const {
  219         MOZ_ASSERT(type == TOK_STRING ||
  220                    type == TOK_TEMPLATE_HEAD ||
  221                    type == TOK_NO_SUBS_TEMPLATE);
  222         return u.atom;
  223     }
  224 
  225     js::RegExpFlag regExpFlags() const {
  226         MOZ_ASSERT(type == TOK_REGEXP);
  227         MOZ_ASSERT((u.reflags & AllFlags) == u.reflags);
  228         return u.reflags;
  229     }
  230 
  231     double number() const {
  232         MOZ_ASSERT(type == TOK_NUMBER);
  233         return u.number.value;
  234     }
  235 
  236     DecimalPoint decimalPoint() const {
  237         MOZ_ASSERT(type == TOK_NUMBER);
  238         return u.number.decimalPoint;
  239     }
  240 };
  241 
  242 class CompileError : public JSErrorReport {
  243 public:
  244     void throwError(JSContext* cx);
  245 };
  246 
  247 // Ideally, tokenizing would be entirely independent of context.  But the
  248 // strict mode flag, which is in SharedContext, affects tokenizing, and
  249 // TokenStream needs to see it.
  250 //
  251 // This class is a tiny back-channel from TokenStream to the strict mode flag
  252 // that avoids exposing the rest of SharedContext to TokenStream.
  253 //
  254 class StrictModeGetter {
  255   public:
  256     virtual bool strictMode() = 0;
  257 };
  258 
  259 // TokenStream is the lexical scanner for Javascript source text.
  260 //
  261 // It takes a buffer of char16_t characters and linearly scans it into |Token|s.
  262 // Internally the class uses a four element circular buffer |tokens| of
  263 // |Token|s. As an index for |tokens|, the member |cursor| points to the
  264 // current token.
  265 // Calls to getToken() increase |cursor| by one and return the new current
  266 // token. If a TokenStream was just created, the current token is initialized
  267 // with random data (i.e. not initialized). It is therefore important that
  268 // one of the first four member functions listed below is called first.
  269 // The circular buffer lets us go back up to two tokens from the last
  270 // scanned token. Internally, the relative number of backward steps that were
  271 // taken (via ungetToken()) after the last token was scanned is stored in
  272 // |lookahead|.
  273 //
  274 // The following table lists in which situations it is safe to call each listed
  275 // function. No checks are made by the functions in non-debug builds.
  276 //
  277 // Function Name     | Precondition; changes to |lookahead|
  278 // ------------------+---------------------------------------------------------
  279 // getToken          | none; if |lookahead > 0| then |lookahead--|
  280 // peekToken         | none; if |lookahead == 0| then |lookahead == 1|
  281 // peekTokenSameLine | none; if |lookahead == 0| then |lookahead == 1|
  282 // matchToken        | none; if |lookahead > 0| and the match succeeds then
  283 //                   |       |lookahead--|
  284 // consumeKnownToken | none; if |lookahead > 0| then |lookahead--|
  285 // ungetToken        | 0 <= |lookahead| <= |maxLookahead - 1|; |lookahead++|
  286 //
  287 // The behavior of the token scanning process (see getTokenInternal()) can be
  288 // modified by calling one of the first four above listed member functions with
  289 // an optional argument of type Modifier.  However, the modifier will be
  290 // ignored unless |lookahead == 0| holds.  Due to constraints of the grammar,
  291 // this turns out not to be a problem in practice. See the
  292 // mozilla.dev.tech.js-engine.internals thread entitled 'Bug in the scanner?'
  293 // for more details:
  294 // https://groups.google.com/forum/?fromgroups=#!topic/mozilla.dev.tech.js-engine.internals/2JLH5jRcr7E).
  295 //
  296 // The methods seek() and tell() allow to rescan from a previous visited
  297 // location of the buffer.
  298 //
  299 class MOZ_STACK_CLASS TokenStream
  300 {
  301     // Unicode separators that are treated as line terminators, in addition to \n, \r.
  302     enum {
  303         LINE_SEPARATOR = 0x2028,
  304         PARA_SEPARATOR = 0x2029
  305     };
  306 
  307     static const size_t ntokens = 4;                // 1 current + 2 lookahead, rounded
  308                                                     // to power of 2 to avoid divmod by 3
  309     static const unsigned maxLookahead = 2;
  310     static const unsigned ntokensMask = ntokens - 1;
  311 
  312   public:
  313     typedef Vector<char16_t, 32> CharBuffer;
  314 
  315     TokenStream(ExclusiveContext* cx, const ReadOnlyCompileOptions& options,
  316                 const char16_t* base, size_t length, StrictModeGetter* smg);
  317 
  318     ~TokenStream();
  319 
  320     MOZ_MUST_USE bool checkOptions();
  321 
  322     // Accessors.
  323     const Token& currentToken() const { return tokens[cursor]; }
  324     bool isCurrentTokenType(TokenKind type) const {
  325         return currentToken().type == type;
  326     }
  327     const CharBuffer& getTokenbuf() const { return tokenbuf; }
  328     const char* getFilename() const { return filename; }
  329     bool getMutedErrors() const { return mutedErrors; }
  330     JSVersion versionNumber() const { return VersionNumber(options().version); }
  331     JSVersion versionWithFlags() const { return options().version; }
  332 
  333     PropertyName* currentName() const {
  334         if (isCurrentTokenType(TOK_YIELD))
  335             return cx->names().yield;
  336         MOZ_ASSERT(isCurrentTokenType(TOK_NAME));
  337         return currentToken().name();
  338     }
  339 
  340     PropertyName* nextName() const {
  341         if (nextToken().type == TOK_YIELD)
  342             return cx->names().yield;
  343         MOZ_ASSERT(nextToken().type == TOK_NAME);
  344         return nextToken().name();
  345     }
  346 
  347     bool nextNameContainsEscape() const {
  348         if (nextToken().type == TOK_YIELD)
  349             return false;
  350         MOZ_ASSERT(nextToken().type == TOK_NAME);
  351         return nextToken().nameContainsEscape();
  352     }
  353 
  354     bool isCurrentTokenAssignment() const {
  355         return TokenKindIsAssignment(currentToken().type);
  356     }
  357 
  358     // Flag methods.
  359     bool isEOF() const { return flags.isEOF; }
  360     bool sawOctalEscape() const { return flags.sawOctalEscape; }
  361     bool hadError() const { return flags.hadError; }
  362     void clearSawOctalEscape() { flags.sawOctalEscape = false; }
  363 
  364     // TokenStream-specific error reporters.
  365     bool reportError(unsigned errorNumber, ...);
  366     bool reportErrorNoOffset(unsigned errorNumber, ...);
  367     bool reportWarning(unsigned errorNumber, ...);
  368 
  369     static const uint32_t NoOffset = UINT32_MAX;
  370 
  371     // General-purpose error reporters.  You should avoid calling these
  372     // directly, and instead use the more succinct alternatives (e.g.
  373     // reportError()) in TokenStream, Parser, and BytecodeEmitter.
  374     bool reportCompileErrorNumberVA(uint32_t offset, unsigned flags, unsigned errorNumber,
  375                                     va_list args);
  376     bool reportStrictModeErrorNumberVA(uint32_t offset, bool strictMode, unsigned errorNumber,
  377                                        va_list args);
  378     bool reportExtraWarningErrorNumberVA(uint32_t offset, unsigned errorNumber, va_list args);
  379 
  380     // asm.js reporter
  381     void reportAsmJSError(uint32_t offset, unsigned errorNumber, ...);
  382 
  383     JSAtom* getRawTemplateStringAtom() {
  384         MOZ_ASSERT(currentToken().type == TOK_TEMPLATE_HEAD ||
  385                    currentToken().type == TOK_NO_SUBS_TEMPLATE);
  386         const char16_t* cur = userbuf.rawCharPtrAt(currentToken().pos.begin + 1);
  387         const char16_t* end;
  388         if (currentToken().type == TOK_TEMPLATE_HEAD) {
  389             // Of the form    |`...${|   or   |}...${|
  390             end = userbuf.rawCharPtrAt(currentToken().pos.end - 2);
  391         } else {
  392             // NO_SUBS_TEMPLATE is of the form   |`...`|   or   |}...`|
  393             end = userbuf.rawCharPtrAt(currentToken().pos.end - 1);
  394         }
  395 
  396         CharBuffer charbuf(cx);
  397         while (cur < end) {
  398             int32_t ch = *cur;
  399             if (ch == '\r') {
  400                 ch = '\n';
  401                 if ((cur + 1 < end) && (*(cur + 1) == '\n'))
  402                     cur++;
  403             }
  404             if (!charbuf.append(ch))
  405                 return nullptr;
  406             cur++;
  407         }
  408         return AtomizeChars(cx, charbuf.begin(), charbuf.length());
  409     }
  410 
  411   private:
  412     // These are private because they should only be called by the tokenizer
  413     // while tokenizing not by, for example, BytecodeEmitter.
  414     bool reportStrictModeError(unsigned errorNumber, ...);
  415     bool strictMode() const { return strictModeGetter && strictModeGetter->strictMode(); }
  416 
  417     static JSAtom* atomize(ExclusiveContext* cx, CharBuffer& cb);
  418     MOZ_MUST_USE bool putIdentInTokenbuf(const char16_t* identStart);
  419 
  420     struct Flags
  421     {
  422         bool isEOF:1;           // Hit end of file.
  423         bool isDirtyLine:1;     // Non-whitespace since start of line.
  424         bool sawOctalEscape:1;  // Saw an octal character escape.
  425         bool hadError:1;        // Hit a syntax error, at start or during a
  426                                 // token.
  427         bool hitOOM:1;          // Hit OOM.
  428 
  429         Flags()
  430           : isEOF(), isDirtyLine(), sawOctalEscape(), hadError(), hitOOM()
  431         {}
  432     };
  433 
  434     bool awaitIsKeyword = false;
  435     friend class AutoAwaitIsKeyword;
  436 
  437   public:
  438     typedef Token::Modifier Modifier;
  439     static constexpr Modifier None = Token::None;
  440     static constexpr Modifier Operand = Token::Operand;
  441     static constexpr Modifier KeywordIsName = Token::KeywordIsName;
  442     static constexpr Modifier TemplateTail = Token::TemplateTail;
  443 
  444     typedef Token::ModifierException ModifierException;
  445     static constexpr ModifierException NoException = Token::NoException;
  446     static constexpr ModifierException NoneIsOperand = Token::NoneIsOperand;
  447     static constexpr ModifierException OperandIsNone = Token::OperandIsNone;
  448     static constexpr ModifierException NoneIsKeywordIsName = Token::NoneIsKeywordIsName;
  449 
  450     void addModifierException(ModifierException modifierException) {
  451 #ifdef DEBUG
  452         const Token& next = nextToken();
  453         if (next.modifierException == NoneIsOperand)
  454         {
  455             // Token after yield expression without operand already has
  456             // NoneIsOperand exception.
  457             MOZ_ASSERT(modifierException == OperandIsNone);
  458             MOZ_ASSERT(next.type != TOK_DIV,
  459                        "next token requires contextual specifier to be parsed unambiguously");
  460 
  461             // Do not update modifierException.
  462             return;
  463         }
  464 
  465         MOZ_ASSERT(next.modifierException == NoException);
  466         switch (modifierException) {
  467           case NoneIsOperand:
  468             MOZ_ASSERT(next.modifier == Operand);
  469             MOZ_ASSERT(next.type != TOK_DIV,
  470                        "next token requires contextual specifier to be parsed unambiguously");
  471             break;
  472           case OperandIsNone:
  473             MOZ_ASSERT(next.modifier == None);
  474             MOZ_ASSERT(next.type != TOK_DIV && next.type != TOK_REGEXP,
  475                        "next token requires contextual specifier to be parsed unambiguously");
  476             break;
  477           case NoneIsKeywordIsName:
  478             MOZ_ASSERT(next.modifier == KeywordIsName);
  479             MOZ_ASSERT(next.type != TOK_NAME);
  480             break;
  481           default:
  482             MOZ_CRASH("unexpected modifier exception");
  483         }
  484         tokens[(cursor + 1) & ntokensMask].modifierException = modifierException;
  485 #endif
  486     }
  487 
  488     void
  489     verifyConsistentModifier(Modifier modifier, Token lookaheadToken) {
  490 #ifdef DEBUG
  491         // Easy case: modifiers match.
  492         if (modifier == lookaheadToken.modifier)
  493             return;
  494 
  495         if (lookaheadToken.modifierException == OperandIsNone) {
  496             // getToken(Operand) permissibly following getToken().
  497             if (modifier == Operand && lookaheadToken.modifier == None)
  498                 return;
  499         }
  500 
  501         if (lookaheadToken.modifierException == NoneIsOperand) {
  502             // getToken() permissibly following getToken(Operand).
  503             if (modifier == None && lookaheadToken.modifier == Operand)
  504                 return;
  505         }
  506 
  507         if (lookaheadToken.modifierException == NoneIsKeywordIsName) {
  508             // getToken() permissibly following getToken(KeywordIsName).
  509             if (modifier == None && lookaheadToken.modifier == KeywordIsName)
  510                 return;
  511         }
  512 
  513         MOZ_ASSERT_UNREACHABLE("this token was previously looked up with a "
  514                                "different modifier, potentially making "
  515                                "tokenization non-deterministic");
  516 #endif
  517     }
  518 
  519     // Advance to the next token.  If the token stream encountered an error,
  520     // return false.  Otherwise return true and store the token kind in |*ttp|.
  521     MOZ_MUST_USE bool getToken(TokenKind* ttp, Modifier modifier = None) {
  522         // Check for a pushed-back token resulting from mismatching lookahead.
  523         if (lookahead != 0) {
  524             MOZ_ASSERT(!flags.hadError);
  525             lookahead--;
  526             cursor = (cursor + 1) & ntokensMask;
  527             TokenKind tt = currentToken().type;
  528             MOZ_ASSERT(tt != TOK_EOL);
  529             verifyConsistentModifier(modifier, currentToken());
  530             *ttp = tt;
  531             return true;
  532         }
  533 
  534         return getTokenInternal(ttp, modifier);
  535     }
  536 
  537     // Push the last scanned token back into the stream.
  538     void ungetToken() {
  539         MOZ_ASSERT(lookahead < maxLookahead);
  540         lookahead++;
  541         cursor = (cursor - 1) & ntokensMask;
  542     }
  543 
  544     MOZ_MUST_USE bool peekToken(TokenKind* ttp, Modifier modifier = None) {
  545         if (lookahead > 0) {
  546             MOZ_ASSERT(!flags.hadError);
  547             verifyConsistentModifier(modifier, nextToken());
  548             *ttp = nextToken().type;
  549             return true;
  550         }
  551         if (!getTokenInternal(ttp, modifier))
  552             return false;
  553         ungetToken();
  554         return true;
  555     }
  556 
  557     MOZ_MUST_USE bool peekTokenPos(TokenPos* posp, Modifier modifier = None) {
  558         if (lookahead == 0) {
  559             TokenKind tt;
  560             if (!getTokenInternal(&tt, modifier))
  561                 return false;
  562             ungetToken();
  563             MOZ_ASSERT(hasLookahead());
  564         } else {
  565             MOZ_ASSERT(!flags.hadError);
  566             verifyConsistentModifier(modifier, nextToken());
  567         }
  568         *posp = nextToken().pos;
  569         return true;
  570     }
  571 
  572     MOZ_MUST_USE bool peekOffset(uint32_t* offset, Modifier modifier = None) {
  573         TokenPos pos;
  574         if (!peekTokenPos(&pos, modifier))
  575             return false;
  576         *offset = pos.begin;
  577         return true;
  578     }
  579 
  580     // This is like peekToken(), with one exception:  if there is an EOL
  581     // between the end of the current token and the start of the next token, it
  582     // return true and store TOK_EOL in |*ttp|.  In that case, no token with
  583     // TOK_EOL is actually created, just a TOK_EOL TokenKind is returned, and
  584     // currentToken() shouldn't be consulted.  (This is the only place TOK_EOL
  585     // is produced.)
  586     MOZ_ALWAYS_INLINE MOZ_MUST_USE bool
  587     peekTokenSameLine(TokenKind* ttp, Modifier modifier = None) {
  588         const Token& curr = currentToken();
  589 
  590         // If lookahead != 0, we have scanned ahead at least one token, and
  591         // |lineno| is the line that the furthest-scanned token ends on.  If
  592         // it's the same as the line that the current token ends on, that's a
  593         // stronger condition than what we are looking for, and we don't need
  594         // to return TOK_EOL.
  595         if (lookahead != 0) {
  596             bool onThisLine;
  597             if (!srcCoords.isOnThisLine(curr.pos.end, lineno, &onThisLine))
  598                 return reportError(JSMSG_OUT_OF_MEMORY);
  599             if (onThisLine) {
  600                 MOZ_ASSERT(!flags.hadError);
  601                 verifyConsistentModifier(modifier, nextToken());
  602                 *ttp = nextToken().type;
  603                 return true;
  604             }
  605         }
  606 
  607         // The above check misses two cases where we don't have to return
  608         // TOK_EOL.
  609         // - The next token starts on the same line, but is a multi-line token.
  610         // - The next token starts on the same line, but lookahead==2 and there
  611         //   is a newline between the next token and the one after that.
  612         // The following test is somewhat expensive but gets these cases (and
  613         // all others) right.
  614         TokenKind tmp;
  615         if (!getToken(&tmp, modifier))
  616             return false;
  617         const Token& next = currentToken();
  618         ungetToken();
  619 
  620         *ttp = srcCoords.lineNum(curr.pos.end) == srcCoords.lineNum(next.pos.begin)
  621              ? next.type
  622              : TOK_EOL;
  623         return true;
  624     }
  625 
  626     // Get the next token from the stream if its kind is |tt|.
  627     MOZ_MUST_USE bool matchToken(bool* matchedp, TokenKind tt, Modifier modifier = None) {
  628         TokenKind token;
  629         if (!getToken(&token, modifier))
  630             return false;
  631         if (token == tt) {
  632             *matchedp = true;
  633         } else {
  634             ungetToken();
  635             *matchedp = false;
  636         }
  637         return true;
  638     }
  639 
  640     void consumeKnownToken(TokenKind tt, Modifier modifier = None) {
  641         bool matched;
  642         MOZ_ASSERT(hasLookahead());
  643         MOZ_ALWAYS_TRUE(matchToken(&matched, tt, modifier));
  644         MOZ_ALWAYS_TRUE(matched);
  645     }
  646 
  647     // Like matchToken(..., TOK_NAME) but further matching the name token only
  648     // if it has the given characters, without containing escape sequences.
  649     // If the name token has the given characters yet *does* contain an escape,
  650     // a syntax error will be reported.
  651     //
  652     // This latter behavior makes this method unsuitable for use in any context
  653     // where ASI might occur.  In such places, an escaped "contextual keyword"
  654     // on a new line is the start of an ExpressionStatement, not a continuation
  655     // of a StatementListItem (or ImportDeclaration or ExportDeclaration, in
  656     // modules).
  657     MOZ_MUST_USE bool matchContextualKeyword(bool* matchedp, Handle<PropertyName*> keyword,
  658                                              Modifier modifier = None)
  659     {
  660         TokenKind token;
  661         if (!getToken(&token, modifier))
  662             return false;
  663         if (token == TOK_NAME && currentToken().name() == keyword) {
  664             if (currentToken().nameContainsEscape()) {
  665                 reportError(JSMSG_ESCAPED_KEYWORD);
  666                 return false;
  667             }
  668 
  669             *matchedp = true;
  670         } else {
  671             *matchedp = false;
  672             ungetToken();
  673         }
  674         return true;
  675     }
  676 
  677     MOZ_MUST_USE bool nextTokenEndsExpr(bool* endsExpr) {
  678         TokenKind tt;
  679         if (!peekToken(&tt))
  680             return false;
  681         *endsExpr = isExprEnding[tt];
  682         return true;
  683     }
  684 
  685     class MOZ_STACK_CLASS Position {
  686       public:
  687         // The Token fields may contain pointers to atoms, so for correct
  688         // rooting we must ensure collection of atoms is disabled while objects
  689         // of this class are live.  Do this by requiring a dummy AutoKeepAtoms
  690         // reference in the constructor.
  691         //
  692         // This class is explicity ignored by the analysis, so don't add any
  693         // more pointers to GC things here!
  694         explicit Position(AutoKeepAtoms&) { }
  695       private:
  696         Position(const Position&) = delete;
  697         friend class TokenStream;
  698         const char16_t* buf;
  699         Flags flags;
  700         unsigned lineno;
  701         size_t linebase;
  702         size_t prevLinebase;
  703         Token currentToken;
  704         unsigned lookahead;
  705         Token lookaheadTokens[maxLookahead];
  706     };
  707 
  708     MOZ_MUST_USE bool advance(size_t position);
  709     void tell(Position*);
  710     void seek(const Position& pos);
  711     MOZ_MUST_USE bool seek(const Position& pos, const TokenStream& other);
  712 #ifdef DEBUG
  713     inline bool debugHasNoLookahead() const {
  714         return lookahead == 0;
  715     }
  716 #endif
  717 
  718     const char16_t* rawCharPtrAt(size_t offset) const {
  719         return userbuf.rawCharPtrAt(offset);
  720     }
  721 
  722     const char16_t* rawLimit() const {
  723         return userbuf.limit();
  724     }
  725 
  726     bool hasDisplayURL() const {
  727         return displayURL_ != nullptr;
  728     }
  729 
  730     char16_t* displayURL() {
  731         return displayURL_.get();
  732     }
  733 
  734     bool hasSourceMapURL() const {
  735         return sourceMapURL_ != nullptr;
  736     }
  737 
  738     char16_t* sourceMapURL() {
  739         return sourceMapURL_.get();
  740     }
  741 
  742     // If |atom| is not a keyword in this version, return true with *ttp
  743     // unchanged.
  744     //
  745     // If it is a reserved word in this version and strictness mode, and thus
  746     // can't be present in correct code, report a SyntaxError and return false.
  747     //
  748     // If it is a keyword, like "if", return true with the keyword's TokenKind
  749     // in *ttp.
  750     MOZ_MUST_USE bool checkForKeyword(JSAtom* atom, TokenKind* ttp);
  751 
  752     // Same semantics as above, but for the provided keyword.
  753     MOZ_MUST_USE bool checkForKeyword(const KeywordInfo* kw, TokenKind* ttp);
  754 
  755     // This class maps a userbuf offset (which is 0-indexed) to a line number
  756     // (which is 1-indexed) and a column index (which is 0-indexed).
  757     class SourceCoords
  758     {
  759         // For a given buffer holding source code, |lineStartOffsets_| has one
  760         // element per line of source code, plus one sentinel element.  Each
  761         // non-sentinel element holds the buffer offset for the start of the
  762         // corresponding line of source code.  For this example script:
  763         //
  764         // 1  // xyz            [line starts at offset 0]
  765         // 2  var x;            [line starts at offset 7]
  766         // 3                    [line starts at offset 14]
  767         // 4  var y;            [line starts at offset 15]
  768         //
  769         // |lineStartOffsets_| is:
  770         //
  771         //   [0, 7, 14, 15, MAX_PTR]
  772         //
  773         // To convert a "line number" to a "line index" (i.e. an index into
  774         // |lineStartOffsets_|), subtract |initialLineNum_|.  E.g. line 3's
  775         // line index is (3 - initialLineNum_), which is 2.  Therefore
  776         // lineStartOffsets_[2] holds the buffer offset for the start of line 3,
  777         // which is 14.  (Note that |initialLineNum_| is often 1, but not
  778         // always.)
  779         //
  780         // The first element is always 0, and the last element is always the
  781         // MAX_PTR sentinel.
  782         //
  783         // offset-to-line/column lookups are O(log n) in the worst case (binary
  784         // search), but in practice they're heavily clustered and we do better
  785         // than that by using the previous lookup's result (lastLineIndex_) as
  786         // a starting point.
  787         //
  788         // Checking if an offset lies within a particular line number
  789         // (isOnThisLine()) is O(1).
  790         //
  791         Vector<uint32_t, 128> lineStartOffsets_;
  792         uint32_t            initialLineNum_;
  793 
  794         // This is mutable because it's modified on every search, but that fact
  795         // isn't visible outside this class.
  796         mutable uint32_t    lastLineIndex_;
  797 
  798         uint32_t lineIndexOf(uint32_t offset) const;
  799 
  800         static const uint32_t MAX_PTR = UINT32_MAX;
  801 
  802         uint32_t lineIndexToNum(uint32_t lineIndex) const { return lineIndex + initialLineNum_; }
  803         uint32_t lineNumToIndex(uint32_t lineNum)   const { return lineNum   - initialLineNum_; }
  804 
  805       public:
  806         SourceCoords(ExclusiveContext* cx, uint32_t ln);
  807 
  808         MOZ_MUST_USE bool add(uint32_t lineNum, uint32_t lineStartOffset);
  809         MOZ_MUST_USE bool fill(const SourceCoords& other);
  810 
  811         bool isOnThisLine(uint32_t offset, uint32_t lineNum, bool* onThisLine) const {
  812             uint32_t lineIndex = lineNumToIndex(lineNum);
  813             if (lineIndex + 1 >= lineStartOffsets_.length()) // +1 due to sentinel
  814                 return false;
  815             *onThisLine = lineStartOffsets_[lineIndex] <= offset &&
  816                           offset < lineStartOffsets_[lineIndex + 1];
  817             return true;
  818         }
  819 
  820         uint32_t lineNum(uint32_t offset) const;
  821         uint32_t columnIndex(uint32_t offset) const;
  822         void lineNumAndColumnIndex(uint32_t offset, uint32_t* lineNum, uint32_t* columnIndex) const;
  823     };
  824 
  825     SourceCoords srcCoords;
  826 
  827     JSAtomState& names() const {
  828         return cx->names();
  829     }
  830 
  831     ExclusiveContext* context() const {
  832         return cx;
  833     }
  834 
  835     const ReadOnlyCompileOptions& options() const {
  836         return options_;
  837     }
  838 
  839   private:
  840     // This is the low-level interface to the JS source code buffer.  It just
  841     // gets raw chars, basically.  TokenStreams functions are layered on top
  842     // and do some extra stuff like converting all EOL sequences to '\n',
  843     // tracking the line number, and setting |flags.isEOF|.  (The "raw" in "raw
  844     // chars" refers to the lack of EOL sequence normalization.)
  845     //
  846     // buf[0..length-1] often represents a substring of some larger source,
  847     // where we have only the substring in memory. The |startOffset| argument
  848     // indicates the offset within this larger string at which our string
  849     // begins, the offset of |buf[0]|.
  850     class TokenBuf {
  851       public:
  852         TokenBuf(ExclusiveContext* cx, const char16_t* buf, size_t length, size_t startOffset)
  853           : base_(buf),
  854             startOffset_(startOffset),
  855             limit_(buf + length),
  856             ptr(buf)
  857         { }
  858 
  859         bool hasRawChars() const {
  860             return ptr < limit_;
  861         }
  862 
  863         bool atStart() const {
  864             return offset() == 0;
  865         }
  866 
  867         size_t startOffset() const {
  868             return startOffset_;
  869         }
  870 
  871         size_t offset() const {
  872             return startOffset_ + mozilla::PointerRangeSize(base_, ptr);
  873         }
  874 
  875         const char16_t* rawCharPtrAt(size_t offset) const {
  876             MOZ_ASSERT(startOffset_ <= offset);
  877             MOZ_ASSERT(offset - startOffset_ <= mozilla::PointerRangeSize(base_, limit_));
  878             return base_ + (offset - startOffset_);
  879         }
  880 
  881         const char16_t* limit() const {
  882             return limit_;
  883         }
  884 
  885         char16_t getRawChar() {
  886             return *ptr++;      // this will nullptr-crash if poisoned
  887         }
  888 
  889         char16_t peekRawChar() const {
  890             return *ptr;        // this will nullptr-crash if poisoned
  891         }
  892 
  893         bool matchRawChar(char16_t c) {
  894             if (*ptr == c) {    // this will nullptr-crash if poisoned
  895                 ptr++;
  896                 return true;
  897             }
  898             return false;
  899         }
  900 
  901         bool matchRawCharBackwards(char16_t c) {
  902             MOZ_ASSERT(ptr);     // make sure it hasn't been poisoned
  903             if (*(ptr - 1) == c) {
  904                 ptr--;
  905                 return true;
  906             }
  907             return false;
  908         }
  909 
  910         void ungetRawChar() {
  911             MOZ_ASSERT(ptr);     // make sure it hasn't been poisoned
  912             ptr--;
  913         }
  914 
  915         const char16_t* addressOfNextRawChar(bool allowPoisoned = false) const {
  916             MOZ_ASSERT_IF(!allowPoisoned, ptr);     // make sure it hasn't been poisoned
  917             return ptr;
  918         }
  919 
  920         // Use this with caution!
  921         void setAddressOfNextRawChar(const char16_t* a, bool allowPoisoned = false) {
  922             MOZ_ASSERT_IF(!allowPoisoned, a);
  923             ptr = a;
  924         }
  925 
  926 #ifdef DEBUG
  927         // Poison the TokenBuf so it cannot be accessed again.
  928         void poison() {
  929             ptr = nullptr;
  930         }
  931 #endif
  932 
  933         static bool isRawEOLChar(int32_t c) {
  934             return c == '\n' || c == '\r' || c == LINE_SEPARATOR || c == PARA_SEPARATOR;
  935         }
  936 
  937         // Returns the offset of the next EOL, but stops once 'max' characters
  938         // have been scanned (*including* the char at startOffset_).
  939         size_t findEOLMax(size_t start, size_t max);
  940 
  941       private:
  942         const char16_t* base_;          // base of buffer
  943         uint32_t startOffset_;          // offset of base_[0]
  944         const char16_t* limit_;         // limit for quick bounds check
  945         const char16_t* ptr;            // next char to get
  946     };
  947 
  948     MOZ_MUST_USE bool getTokenInternal(TokenKind* ttp, Modifier modifier);
  949 
  950     MOZ_MUST_USE bool getBracedUnicode(uint32_t* code);
  951     MOZ_MUST_USE bool getStringOrTemplateToken(int untilChar, Token** tp);
  952 
  953     int32_t getChar();
  954     int32_t getCharIgnoreEOL();
  955     void ungetChar(int32_t c);
  956     void ungetCharIgnoreEOL(int32_t c);
  957     Token* newToken(ptrdiff_t adjust);
  958     uint32_t peekUnicodeEscape(uint32_t* codePoint);
  959     uint32_t peekExtendedUnicodeEscape(uint32_t* codePoint);
  960     uint32_t matchUnicodeEscapeIdStart(uint32_t* codePoint);
  961     bool matchUnicodeEscapeIdent(uint32_t* codePoint);
  962     bool matchTrailForLeadSurrogate(char16_t lead, char16_t* trail, uint32_t* codePoint);
  963     bool peekChars(int n, char16_t* cp);
  964 
  965     MOZ_MUST_USE bool getDirectives(bool isMultiline, bool shouldWarnDeprecated);
  966     MOZ_MUST_USE bool getDirective(bool isMultiline, bool shouldWarnDeprecated,
  967                                    const char* directive, int directiveLength,
  968                                    const char* errorMsgPragma,
  969                                    UniquePtr<char16_t[], JS::FreePolicy>* destination);
  970     MOZ_MUST_USE bool getDisplayURL(bool isMultiline, bool shouldWarnDeprecated);
  971     MOZ_MUST_USE bool getSourceMappingURL(bool isMultiline, bool shouldWarnDeprecated);
  972 
  973     // |expect| cannot be an EOL char.
  974     bool matchChar(int32_t expect) {
  975         MOZ_ASSERT(!TokenBuf::isRawEOLChar(expect));
  976         return MOZ_LIKELY(userbuf.hasRawChars()) &&
  977                userbuf.matchRawChar(expect);
  978     }
  979 
  980     void consumeKnownChar(int32_t expect) {
  981         mozilla::DebugOnly<int32_t> c = getChar();
  982         MOZ_ASSERT(c == expect);
  983     }
  984 
  985     int32_t peekChar() {
  986         int32_t c = getChar();
  987         ungetChar(c);
  988         return c;
  989     }
  990 
  991     void skipChars(int n) {
  992         while (--n >= 0)
  993             getChar();
  994     }
  995 
  996     void skipCharsIgnoreEOL(int n) {
  997         while (--n >= 0)
  998             getCharIgnoreEOL();
  999     }
 1000 
 1001     void updateLineInfoForEOL();
 1002     void updateFlagsForEOL();
 1003 
 1004     const Token& nextToken() const {
 1005         MOZ_ASSERT(hasLookahead());
 1006         return tokens[(cursor + 1) & ntokensMask];
 1007     }
 1008 
 1009     bool hasLookahead() const { return lookahead > 0; }
 1010 
 1011     // Options used for parsing/tokenizing.
 1012     const ReadOnlyCompileOptions& options_;
 1013 
 1014     Token               tokens[ntokens];    // circular token buffer
 1015     unsigned            cursor;             // index of last parsed token
 1016     unsigned            lookahead;          // count of lookahead tokens
 1017     unsigned            lineno;             // current line number
 1018     Flags               flags;              // flags -- see above
 1019     size_t              linebase;           // start of current line
 1020     size_t              prevLinebase;       // start of previous line;  size_t(-1) if on the first line
 1021     TokenBuf            userbuf;            // user input buffer
 1022     const char*         filename;           // input filename or null
 1023     UniqueTwoByteChars  displayURL_;        // the user's requested source URL or null
 1024     UniqueTwoByteChars  sourceMapURL_;      // source map's filename or null
 1025     CharBuffer          tokenbuf;           // current token string buffer
 1026     uint8_t             isExprEnding[TOK_LIMIT];// which tokens definitely terminate exprs?
 1027     ExclusiveContext*   const cx;
 1028     bool                mutedErrors;
 1029     StrictModeGetter*   strictModeGetter;  // used to test for strict mode
 1030 };
 1031 
 1032 class MOZ_STACK_CLASS AutoAwaitIsKeyword
 1033 {
 1034 private:
 1035     TokenStream* ts_;
 1036     bool oldAwaitIsKeyword_;
 1037 
 1038 public:
 1039     AutoAwaitIsKeyword(TokenStream* ts, bool awaitIsKeyword) {
 1040         ts_ = ts;
 1041         oldAwaitIsKeyword_ = ts_->awaitIsKeyword;
 1042         ts_->awaitIsKeyword = awaitIsKeyword;
 1043     }
 1044 
 1045     ~AutoAwaitIsKeyword() {
 1046         ts_->awaitIsKeyword = oldAwaitIsKeyword_;
 1047         ts_ = nullptr;
 1048     }
 1049 };
 1050 
 1051 extern const char*
 1052 TokenKindToDesc(TokenKind tt);
 1053 
 1054 } // namespace frontend
 1055 } // namespace js
 1056 
 1057 extern JS_FRIEND_API(int)
 1058 js_fgets(char* buf, int size, FILE* file);
 1059 
 1060 #ifdef DEBUG
 1061 extern const char*
 1062 TokenKindToString(js::frontend::TokenKind tt);
 1063 #endif
 1064 
 1065 #endif /* frontend_TokenStream_h */