"Fossies" - the Fresh Open Source Software Archive

Member "grav/vendor/twig/twig/src/Lexer.php" (1 Sep 2020, 20580 Bytes) of package /linux/www/grav-v1.6.27.zip:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) PHP source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "Lexer.php" see the Fossies "Dox" file reference documentation.

    1 <?php
    2 
    3 /*
    4  * This file is part of Twig.
    5  *
    6  * (c) Fabien Potencier
    7  * (c) Armin Ronacher
    8  *
    9  * For the full copyright and license information, please view the LICENSE
   10  * file that was distributed with this source code.
   11  */
   12 
   13 namespace Twig;
   14 
   15 use Twig\Error\SyntaxError;
   16 
   17 /**
   18  * Lexes a template string.
   19  *
   20  * @author Fabien Potencier <fabien@symfony.com>
   21  */
   22 class Lexer implements \Twig_LexerInterface
   23 {
   24     protected $tokens;
   25     protected $code;
   26     protected $cursor;
   27     protected $lineno;
   28     protected $end;
   29     protected $state;
   30     protected $states;
   31     protected $brackets;
   32     protected $env;
   33     // to be renamed to $name in 2.0 (where it is private)
   34     protected $filename;
   35     protected $options;
   36     protected $regexes;
   37     protected $position;
   38     protected $positions;
   39     protected $currentVarBlockLine;
   40 
   41     private $source;
   42 
   43     const STATE_DATA = 0;
   44     const STATE_BLOCK = 1;
   45     const STATE_VAR = 2;
   46     const STATE_STRING = 3;
   47     const STATE_INTERPOLATION = 4;
   48 
   49     const REGEX_NAME = '/[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*/A';
   50     const REGEX_NUMBER = '/[0-9]+(?:\.[0-9]+)?([Ee][\+\-][0-9]+)?/A';
   51     const REGEX_STRING = '/"([^#"\\\\]*(?:\\\\.[^#"\\\\]*)*)"|\'([^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'/As';
   52     const REGEX_DQ_STRING_DELIM = '/"/A';
   53     const REGEX_DQ_STRING_PART = '/[^#"\\\\]*(?:(?:\\\\.|#(?!\{))[^#"\\\\]*)*/As';
   54     const PUNCTUATION = '()[]{}?:.,|';
   55 
   56     public function __construct(Environment $env, array $options = [])
   57     {
   58         $this->env = $env;
   59 
   60         $this->options = array_merge([
   61             'tag_comment' => ['{#', '#}'],
   62             'tag_block' => ['{%', '%}'],
   63             'tag_variable' => ['{{', '}}'],
   64             'whitespace_trim' => '-',
   65             'whitespace_line_trim' => '~',
   66             'whitespace_line_chars' => ' \t\0\x0B',
   67             'interpolation' => ['#{', '}'],
   68         ], $options);
   69 
   70         // when PHP 7.3 is the min version, we will be able to remove the '#' part in preg_quote as it's part of the default
   71         $this->regexes = [
   72             // }}
   73             'lex_var' => '{
   74                 \s*
   75                 (?:'.
   76                     preg_quote($this->options['whitespace_trim'].$this->options['tag_variable'][1], '#').'\s*'. // -}}\s*
   77                     '|'.
   78                     preg_quote($this->options['whitespace_line_trim'].$this->options['tag_variable'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~}}[ \t\0\x0B]*
   79                     '|'.
   80                     preg_quote($this->options['tag_variable'][1], '#'). // }}
   81                 ')
   82             }Ax',
   83 
   84             // %}
   85             'lex_block' => '{
   86                 \s*
   87                 (?:'.
   88                     preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*\n?'. // -%}\s*\n?
   89                     '|'.
   90                     preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~%}[ \t\0\x0B]*
   91                     '|'.
   92                     preg_quote($this->options['tag_block'][1], '#').'\n?'. // %}\n?
   93                 ')
   94             }Ax',
   95 
   96             // {% endverbatim %}
   97             'lex_raw_data' => '{'.
   98                 preg_quote($this->options['tag_block'][0], '#'). // {%
   99                 '('.
  100                     $this->options['whitespace_trim']. // -
  101                     '|'.
  102                     $this->options['whitespace_line_trim']. // ~
  103                 ')?\s*'.
  104                 '(?:end%s)'. // endraw or endverbatim
  105                 '\s*'.
  106                 '(?:'.
  107                     preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*'. // -%}
  108                     '|'.
  109                     preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~%}[ \t\0\x0B]*
  110                     '|'.
  111                     preg_quote($this->options['tag_block'][1], '#'). // %}
  112                 ')
  113             }sx',
  114 
  115             'operator' => $this->getOperatorRegex(),
  116 
  117             // #}
  118             'lex_comment' => '{
  119                 (?:'.
  120                     preg_quote($this->options['whitespace_trim']).preg_quote($this->options['tag_comment'][1], '#').'\s*\n?'. // -#}\s*\n?
  121                     '|'.
  122                     preg_quote($this->options['whitespace_line_trim'].$this->options['tag_comment'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~#}[ \t\0\x0B]*
  123                     '|'.
  124                     preg_quote($this->options['tag_comment'][1], '#').'\n?'. // #}\n?
  125                 ')
  126             }sx',
  127 
  128             // verbatim %}
  129             'lex_block_raw' => '{
  130                 \s*
  131                 (raw|verbatim)
  132                 \s*
  133                 (?:'.
  134                     preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*'. // -%}\s*
  135                     '|'.
  136                     preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~%}[ \t\0\x0B]*
  137                     '|'.
  138                     preg_quote($this->options['tag_block'][1], '#'). // %}
  139                 ')
  140             }Asx',
  141 
  142             'lex_block_line' => '{\s*line\s+(\d+)\s*'.preg_quote($this->options['tag_block'][1], '#').'}As',
  143 
  144             // {{ or {% or {#
  145             'lex_tokens_start' => '{
  146                 ('.
  147                     preg_quote($this->options['tag_variable'][0], '#'). // {{
  148                     '|'.
  149                     preg_quote($this->options['tag_block'][0], '#'). // {%
  150                     '|'.
  151                     preg_quote($this->options['tag_comment'][0], '#'). // {#
  152                 ')('.
  153                     preg_quote($this->options['whitespace_trim'], '#'). // -
  154                     '|'.
  155                     preg_quote($this->options['whitespace_line_trim'], '#'). // ~
  156                 ')?
  157             }sx',
  158             'interpolation_start' => '{'.preg_quote($this->options['interpolation'][0], '#').'\s*}A',
  159             'interpolation_end' => '{\s*'.preg_quote($this->options['interpolation'][1], '#').'}A',
  160         ];
  161     }
  162 
  163     public function tokenize($code, $name = null)
  164     {
  165         if (!$code instanceof Source) {
  166             @trigger_error(sprintf('Passing a string as the $code argument of %s() is deprecated since version 1.27 and will be removed in 2.0. Pass a \Twig\Source instance instead.', __METHOD__), E_USER_DEPRECATED);
  167             $this->source = new Source($code, $name);
  168         } else {
  169             $this->source = $code;
  170         }
  171 
  172         if (((int) ini_get('mbstring.func_overload')) & 2) {
  173             @trigger_error('Support for having "mbstring.func_overload" different from 0 is deprecated version 1.29 and will be removed in 2.0.', E_USER_DEPRECATED);
  174         }
  175 
  176         if (\function_exists('mb_internal_encoding') && ((int) ini_get('mbstring.func_overload')) & 2) {
  177             $mbEncoding = mb_internal_encoding();
  178             mb_internal_encoding('ASCII');
  179         } else {
  180             $mbEncoding = null;
  181         }
  182 
  183         $this->code = str_replace(["\r\n", "\r"], "\n", $this->source->getCode());
  184         $this->filename = $this->source->getName();
  185         $this->cursor = 0;
  186         $this->lineno = 1;
  187         $this->end = \strlen($this->code);
  188         $this->tokens = [];
  189         $this->state = self::STATE_DATA;
  190         $this->states = [];
  191         $this->brackets = [];
  192         $this->position = -1;
  193 
  194         // find all token starts in one go
  195         preg_match_all($this->regexes['lex_tokens_start'], $this->code, $matches, PREG_OFFSET_CAPTURE);
  196         $this->positions = $matches;
  197 
  198         while ($this->cursor < $this->end) {
  199             // dispatch to the lexing functions depending
  200             // on the current state
  201             switch ($this->state) {
  202                 case self::STATE_DATA:
  203                     $this->lexData();
  204                     break;
  205 
  206                 case self::STATE_BLOCK:
  207                     $this->lexBlock();
  208                     break;
  209 
  210                 case self::STATE_VAR:
  211                     $this->lexVar();
  212                     break;
  213 
  214                 case self::STATE_STRING:
  215                     $this->lexString();
  216                     break;
  217 
  218                 case self::STATE_INTERPOLATION:
  219                     $this->lexInterpolation();
  220                     break;
  221             }
  222         }
  223 
  224         $this->pushToken(Token::EOF_TYPE);
  225 
  226         if (!empty($this->brackets)) {
  227             list($expect, $lineno) = array_pop($this->brackets);
  228             throw new SyntaxError(sprintf('Unclosed "%s".', $expect), $lineno, $this->source);
  229         }
  230 
  231         if ($mbEncoding) {
  232             mb_internal_encoding($mbEncoding);
  233         }
  234 
  235         return new TokenStream($this->tokens, $this->source);
  236     }
  237 
  238     protected function lexData()
  239     {
  240         // if no matches are left we return the rest of the template as simple text token
  241         if ($this->position == \count($this->positions[0]) - 1) {
  242             $this->pushToken(Token::TEXT_TYPE, substr($this->code, $this->cursor));
  243             $this->cursor = $this->end;
  244 
  245             return;
  246         }
  247 
  248         // Find the first token after the current cursor
  249         $position = $this->positions[0][++$this->position];
  250         while ($position[1] < $this->cursor) {
  251             if ($this->position == \count($this->positions[0]) - 1) {
  252                 return;
  253             }
  254             $position = $this->positions[0][++$this->position];
  255         }
  256 
  257         // push the template text first
  258         $text = $textContent = substr($this->code, $this->cursor, $position[1] - $this->cursor);
  259 
  260         // trim?
  261         if (isset($this->positions[2][$this->position][0])) {
  262             if ($this->options['whitespace_trim'] === $this->positions[2][$this->position][0]) {
  263                 // whitespace_trim detected ({%-, {{- or {#-)
  264                 $text = rtrim($text);
  265             } elseif ($this->options['whitespace_line_trim'] === $this->positions[2][$this->position][0]) {
  266                 // whitespace_line_trim detected ({%~, {{~ or {#~)
  267                 // don't trim \r and \n
  268                 $text = rtrim($text, " \t\0\x0B");
  269             }
  270         }
  271         $this->pushToken(Token::TEXT_TYPE, $text);
  272         $this->moveCursor($textContent.$position[0]);
  273 
  274         switch ($this->positions[1][$this->position][0]) {
  275             case $this->options['tag_comment'][0]:
  276                 $this->lexComment();
  277                 break;
  278 
  279             case $this->options['tag_block'][0]:
  280                 // raw data?
  281                 if (preg_match($this->regexes['lex_block_raw'], $this->code, $match, 0, $this->cursor)) {
  282                     $this->moveCursor($match[0]);
  283                     $this->lexRawData($match[1]);
  284                 // {% line \d+ %}
  285                 } elseif (preg_match($this->regexes['lex_block_line'], $this->code, $match, 0, $this->cursor)) {
  286                     $this->moveCursor($match[0]);
  287                     $this->lineno = (int) $match[1];
  288                 } else {
  289                     $this->pushToken(Token::BLOCK_START_TYPE);
  290                     $this->pushState(self::STATE_BLOCK);
  291                     $this->currentVarBlockLine = $this->lineno;
  292                 }
  293                 break;
  294 
  295             case $this->options['tag_variable'][0]:
  296                 $this->pushToken(Token::VAR_START_TYPE);
  297                 $this->pushState(self::STATE_VAR);
  298                 $this->currentVarBlockLine = $this->lineno;
  299                 break;
  300         }
  301     }
  302 
  303     protected function lexBlock()
  304     {
  305         if (empty($this->brackets) && preg_match($this->regexes['lex_block'], $this->code, $match, 0, $this->cursor)) {
  306             $this->pushToken(Token::BLOCK_END_TYPE);
  307             $this->moveCursor($match[0]);
  308             $this->popState();
  309         } else {
  310             $this->lexExpression();
  311         }
  312     }
  313 
  314     protected function lexVar()
  315     {
  316         if (empty($this->brackets) && preg_match($this->regexes['lex_var'], $this->code, $match, 0, $this->cursor)) {
  317             $this->pushToken(Token::VAR_END_TYPE);
  318             $this->moveCursor($match[0]);
  319             $this->popState();
  320         } else {
  321             $this->lexExpression();
  322         }
  323     }
  324 
  325     protected function lexExpression()
  326     {
  327         // whitespace
  328         if (preg_match('/\s+/A', $this->code, $match, 0, $this->cursor)) {
  329             $this->moveCursor($match[0]);
  330 
  331             if ($this->cursor >= $this->end) {
  332                 throw new SyntaxError(sprintf('Unclosed "%s".', self::STATE_BLOCK === $this->state ? 'block' : 'variable'), $this->currentVarBlockLine, $this->source);
  333             }
  334         }
  335 
  336         // arrow function
  337         if ('=' === $this->code[$this->cursor] && '>' === $this->code[$this->cursor + 1]) {
  338             $this->pushToken(Token::ARROW_TYPE, '=>');
  339             $this->moveCursor('=>');
  340         }
  341         // operators
  342         elseif (preg_match($this->regexes['operator'], $this->code, $match, 0, $this->cursor)) {
  343             $this->pushToken(Token::OPERATOR_TYPE, preg_replace('/\s+/', ' ', $match[0]));
  344             $this->moveCursor($match[0]);
  345         }
  346         // names
  347         elseif (preg_match(self::REGEX_NAME, $this->code, $match, 0, $this->cursor)) {
  348             $this->pushToken(Token::NAME_TYPE, $match[0]);
  349             $this->moveCursor($match[0]);
  350         }
  351         // numbers
  352         elseif (preg_match(self::REGEX_NUMBER, $this->code, $match, 0, $this->cursor)) {
  353             $number = (float) $match[0];  // floats
  354             if (ctype_digit($match[0]) && $number <= PHP_INT_MAX) {
  355                 $number = (int) $match[0]; // integers lower than the maximum
  356             }
  357             $this->pushToken(Token::NUMBER_TYPE, $number);
  358             $this->moveCursor($match[0]);
  359         }
  360         // punctuation
  361         elseif (false !== strpos(self::PUNCTUATION, $this->code[$this->cursor])) {
  362             // opening bracket
  363             if (false !== strpos('([{', $this->code[$this->cursor])) {
  364                 $this->brackets[] = [$this->code[$this->cursor], $this->lineno];
  365             }
  366             // closing bracket
  367             elseif (false !== strpos(')]}', $this->code[$this->cursor])) {
  368                 if (empty($this->brackets)) {
  369                     throw new SyntaxError(sprintf('Unexpected "%s".', $this->code[$this->cursor]), $this->lineno, $this->source);
  370                 }
  371 
  372                 list($expect, $lineno) = array_pop($this->brackets);
  373                 if ($this->code[$this->cursor] != strtr($expect, '([{', ')]}')) {
  374                     throw new SyntaxError(sprintf('Unclosed "%s".', $expect), $lineno, $this->source);
  375                 }
  376             }
  377 
  378             $this->pushToken(Token::PUNCTUATION_TYPE, $this->code[$this->cursor]);
  379             ++$this->cursor;
  380         }
  381         // strings
  382         elseif (preg_match(self::REGEX_STRING, $this->code, $match, 0, $this->cursor)) {
  383             $this->pushToken(Token::STRING_TYPE, stripcslashes(substr($match[0], 1, -1)));
  384             $this->moveCursor($match[0]);
  385         }
  386         // opening double quoted string
  387         elseif (preg_match(self::REGEX_DQ_STRING_DELIM, $this->code, $match, 0, $this->cursor)) {
  388             $this->brackets[] = ['"', $this->lineno];
  389             $this->pushState(self::STATE_STRING);
  390             $this->moveCursor($match[0]);
  391         }
  392         // unlexable
  393         else {
  394             throw new SyntaxError(sprintf('Unexpected character "%s".', $this->code[$this->cursor]), $this->lineno, $this->source);
  395         }
  396     }
  397 
  398     protected function lexRawData($tag)
  399     {
  400         if ('raw' === $tag) {
  401             @trigger_error(sprintf('Twig Tag "raw" is deprecated since version 1.21. Use "verbatim" instead in %s at line %d.', $this->filename, $this->lineno), E_USER_DEPRECATED);
  402         }
  403 
  404         if (!preg_match(str_replace('%s', $tag, $this->regexes['lex_raw_data']), $this->code, $match, PREG_OFFSET_CAPTURE, $this->cursor)) {
  405             throw new SyntaxError(sprintf('Unexpected end of file: Unclosed "%s" block.', $tag), $this->lineno, $this->source);
  406         }
  407 
  408         $text = substr($this->code, $this->cursor, $match[0][1] - $this->cursor);
  409         $this->moveCursor($text.$match[0][0]);
  410 
  411         // trim?
  412         if (isset($match[1][0])) {
  413             if ($this->options['whitespace_trim'] === $match[1][0]) {
  414                 // whitespace_trim detected ({%-, {{- or {#-)
  415                 $text = rtrim($text);
  416             } else {
  417                 // whitespace_line_trim detected ({%~, {{~ or {#~)
  418                 // don't trim \r and \n
  419                 $text = rtrim($text, " \t\0\x0B");
  420             }
  421         }
  422 
  423         $this->pushToken(Token::TEXT_TYPE, $text);
  424     }
  425 
  426     protected function lexComment()
  427     {
  428         if (!preg_match($this->regexes['lex_comment'], $this->code, $match, PREG_OFFSET_CAPTURE, $this->cursor)) {
  429             throw new SyntaxError('Unclosed comment.', $this->lineno, $this->source);
  430         }
  431 
  432         $this->moveCursor(substr($this->code, $this->cursor, $match[0][1] - $this->cursor).$match[0][0]);
  433     }
  434 
  435     protected function lexString()
  436     {
  437         if (preg_match($this->regexes['interpolation_start'], $this->code, $match, 0, $this->cursor)) {
  438             $this->brackets[] = [$this->options['interpolation'][0], $this->lineno];
  439             $this->pushToken(Token::INTERPOLATION_START_TYPE);
  440             $this->moveCursor($match[0]);
  441             $this->pushState(self::STATE_INTERPOLATION);
  442         } elseif (preg_match(self::REGEX_DQ_STRING_PART, $this->code, $match, 0, $this->cursor) && \strlen($match[0]) > 0) {
  443             $this->pushToken(Token::STRING_TYPE, stripcslashes($match[0]));
  444             $this->moveCursor($match[0]);
  445         } elseif (preg_match(self::REGEX_DQ_STRING_DELIM, $this->code, $match, 0, $this->cursor)) {
  446             list($expect, $lineno) = array_pop($this->brackets);
  447             if ('"' != $this->code[$this->cursor]) {
  448                 throw new SyntaxError(sprintf('Unclosed "%s".', $expect), $lineno, $this->source);
  449             }
  450 
  451             $this->popState();
  452             ++$this->cursor;
  453         } else {
  454             // unlexable
  455             throw new SyntaxError(sprintf('Unexpected character "%s".', $this->code[$this->cursor]), $this->lineno, $this->source);
  456         }
  457     }
  458 
  459     protected function lexInterpolation()
  460     {
  461         $bracket = end($this->brackets);
  462         if ($this->options['interpolation'][0] === $bracket[0] && preg_match($this->regexes['interpolation_end'], $this->code, $match, 0, $this->cursor)) {
  463             array_pop($this->brackets);
  464             $this->pushToken(Token::INTERPOLATION_END_TYPE);
  465             $this->moveCursor($match[0]);
  466             $this->popState();
  467         } else {
  468             $this->lexExpression();
  469         }
  470     }
  471 
  472     protected function pushToken($type, $value = '')
  473     {
  474         // do not push empty text tokens
  475         if (Token::TEXT_TYPE === $type && '' === $value) {
  476             return;
  477         }
  478 
  479         $this->tokens[] = new Token($type, $value, $this->lineno);
  480     }
  481 
  482     protected function moveCursor($text)
  483     {
  484         $this->cursor += \strlen($text);
  485         $this->lineno += substr_count($text, "\n");
  486     }
  487 
  488     protected function getOperatorRegex()
  489     {
  490         $operators = array_merge(
  491             ['='],
  492             array_keys($this->env->getUnaryOperators()),
  493             array_keys($this->env->getBinaryOperators())
  494         );
  495 
  496         $operators = array_combine($operators, array_map('strlen', $operators));
  497         arsort($operators);
  498 
  499         $regex = [];
  500         foreach ($operators as $operator => $length) {
  501             // an operator that ends with a character must be followed by
  502             // a whitespace or a parenthesis
  503             if (ctype_alpha($operator[$length - 1])) {
  504                 $r = preg_quote($operator, '/').'(?=[\s()])';
  505             } else {
  506                 $r = preg_quote($operator, '/');
  507             }
  508 
  509             // an operator with a space can be any amount of whitespaces
  510             $r = preg_replace('/\s+/', '\s+', $r);
  511 
  512             $regex[] = $r;
  513         }
  514 
  515         return '/'.implode('|', $regex).'/A';
  516     }
  517 
  518     protected function pushState($state)
  519     {
  520         $this->states[] = $this->state;
  521         $this->state = $state;
  522     }
  523 
  524     protected function popState()
  525     {
  526         if (0 === \count($this->states)) {
  527             throw new \LogicException('Cannot pop state without a previous state.');
  528         }
  529 
  530         $this->state = array_pop($this->states);
  531     }
  532 }
  533 
  534 class_alias('Twig\Lexer', 'Twig_Lexer');