"Fossies" - the Fresh Open Source Software Archive

Member "yii-1.1.22.bf1d26/framework/vendors/htmlpurifier/HTMLPurifier.standalone.php" (16 Jan 2020, 700552 Bytes) of package /linux/www/yii-1.1.22.bf1d26.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) PHP source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. See also the latest Fossies "Diffs" side-by-side code changes report for "HTMLPurifier.standalone.php": 1.1.21.733ac5_vs_1.1.22.bf1d26.

    1 <?php
    2 
    3 /**
    4  * @file
    5  * This file was auto-generated by generate-includes.php and includes all of
    6  * the core files required by HTML Purifier. Use this if performance is a
    7  * primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS
    8  * FILE, changes will be overwritten the next time the script is run.
    9  *
   10  * @version 4.10.0
   11  *
   12  * @warning
   13  *      You must *not* include any other HTML Purifier files before this file,
   14  *      because 'require' not 'require_once' is used.
   15  *
   16  * @warning
   17  *      This file requires that the include path contains the HTML Purifier
   18  *      library directory; this is not auto-set.
   19  */
   20 
   21 
   22 
   23 /*! @mainpage
   24  *
   25  * HTML Purifier is an HTML filter that will take an arbitrary snippet of
   26  * HTML and rigorously test, validate and filter it into a version that
   27  * is safe for output onto webpages. It achieves this by:
   28  *
   29  *  -# Lexing (parsing into tokens) the document,
   30  *  -# Executing various strategies on the tokens:
   31  *      -# Removing all elements not in the whitelist,
   32  *      -# Making the tokens well-formed,
   33  *      -# Fixing the nesting of the nodes, and
   34  *      -# Validating attributes of the nodes; and
   35  *  -# Generating HTML from the purified tokens.
   36  *
   37  * However, most users will only need to interface with the HTMLPurifier
   38  * and HTMLPurifier_Config.
   39  */
   40 
   41 /*
   42     HTML Purifier 4.10.0 - Standards Compliant HTML Filtering
   43     Copyright (C) 2006-2008 Edward Z. Yang
   44 
   45     This library is free software; you can redistribute it and/or
   46     modify it under the terms of the GNU Lesser General Public
   47     License as published by the Free Software Foundation; either
   48     version 2.1 of the License, or (at your option) any later version.
   49 
   50     This library is distributed in the hope that it will be useful,
   51     but WITHOUT ANY WARRANTY; without even the implied warranty of
   52     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   53     Lesser General Public License for more details.
   54 
   55     You should have received a copy of the GNU Lesser General Public
   56     License along with this library; if not, write to the Free Software
   57     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
   58  */
   59 
   60 /**
   61  * Facade that coordinates HTML Purifier's subsystems in order to purify HTML.
   62  *
   63  * @note There are several points in which configuration can be specified
   64  *       for HTML Purifier.  The precedence of these (from lowest to
   65  *       highest) is as follows:
   66  *          -# Instance: new HTMLPurifier($config)
   67  *          -# Invocation: purify($html, $config)
   68  *       These configurations are entirely independent of each other and
   69  *       are *not* merged (this behavior may change in the future).
   70  *
   71  * @todo We need an easier way to inject strategies using the configuration
   72  *       object.
   73  */
   74 class HTMLPurifier
   75 {
   76 
   77     /**
   78      * Version of HTML Purifier.
   79      * @type string
   80      */
   81     public $version = '4.10.0';
   82 
   83     /**
   84      * Constant with version of HTML Purifier.
   85      */
   86     const VERSION = '4.10.0';
   87 
   88     /**
   89      * Global configuration object.
   90      * @type HTMLPurifier_Config
   91      */
   92     public $config;
   93 
   94     /**
   95      * Array of extra filter objects to run on HTML,
   96      * for backwards compatibility.
   97      * @type HTMLPurifier_Filter[]
   98      */
   99     private $filters = array();
  100 
  101     /**
  102      * Single instance of HTML Purifier.
  103      * @type HTMLPurifier
  104      */
  105     private static $instance;
  106 
  107     /**
  108      * @type HTMLPurifier_Strategy_Core
  109      */
  110     protected $strategy;
  111 
  112     /**
  113      * @type HTMLPurifier_Generator
  114      */
  115     protected $generator;
  116 
  117     /**
  118      * Resultant context of last run purification.
  119      * Is an array of contexts if the last called method was purifyArray().
  120      * @type HTMLPurifier_Context
  121      */
  122     public $context;
  123 
  124     /**
  125      * Initializes the purifier.
  126      *
  127      * @param HTMLPurifier_Config|mixed $config Optional HTMLPurifier_Config object
  128      *                for all instances of the purifier, if omitted, a default
  129      *                configuration is supplied (which can be overridden on a
  130      *                per-use basis).
  131      *                The parameter can also be any type that
  132      *                HTMLPurifier_Config::create() supports.
  133      */
  134     public function __construct($config = null)
  135     {
  136         $this->config = HTMLPurifier_Config::create($config);
  137         $this->strategy = new HTMLPurifier_Strategy_Core();
  138     }
  139 
  140     /**
  141      * Adds a filter to process the output. First come first serve
  142      *
  143      * @param HTMLPurifier_Filter $filter HTMLPurifier_Filter object
  144      */
  145     public function addFilter($filter)
  146     {
  147         trigger_error(
  148             'HTMLPurifier->addFilter() is deprecated, use configuration directives' .
  149             ' in the Filter namespace or Filter.Custom',
  150             E_USER_WARNING
  151         );
  152         $this->filters[] = $filter;
  153     }
  154 
  155     /**
  156      * Filters an HTML snippet/document to be XSS-free and standards-compliant.
  157      *
  158      * @param string $html String of HTML to purify
  159      * @param HTMLPurifier_Config $config Config object for this operation,
  160      *                if omitted, defaults to the config object specified during this
  161      *                object's construction. The parameter can also be any type
  162      *                that HTMLPurifier_Config::create() supports.
  163      *
  164      * @return string Purified HTML
  165      */
  166     public function purify($html, $config = null)
  167     {
  168         // :TODO: make the config merge in, instead of replace
  169         $config = $config ? HTMLPurifier_Config::create($config) : $this->config;
  170 
  171         // implementation is partially environment dependant, partially
  172         // configuration dependant
  173         $lexer = HTMLPurifier_Lexer::create($config);
  174 
  175         $context = new HTMLPurifier_Context();
  176 
  177         // setup HTML generator
  178         $this->generator = new HTMLPurifier_Generator($config, $context);
  179         $context->register('Generator', $this->generator);
  180 
  181         // set up global context variables
  182         if ($config->get('Core.CollectErrors')) {
  183             // may get moved out if other facilities use it
  184             $language_factory = HTMLPurifier_LanguageFactory::instance();
  185             $language = $language_factory->create($config, $context);
  186             $context->register('Locale', $language);
  187 
  188             $error_collector = new HTMLPurifier_ErrorCollector($context);
  189             $context->register('ErrorCollector', $error_collector);
  190         }
  191 
  192         // setup id_accumulator context, necessary due to the fact that
  193         // AttrValidator can be called from many places
  194         $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
  195         $context->register('IDAccumulator', $id_accumulator);
  196 
  197         $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
  198 
  199         // setup filters
  200         $filter_flags = $config->getBatch('Filter');
  201         $custom_filters = $filter_flags['Custom'];
  202         unset($filter_flags['Custom']);
  203         $filters = array();
  204         foreach ($filter_flags as $filter => $flag) {
  205             if (!$flag) {
  206                 continue;
  207             }
  208             if (strpos($filter, '.') !== false) {
  209                 continue;
  210             }
  211             $class = "HTMLPurifier_Filter_$filter";
  212             $filters[] = new $class;
  213         }
  214         foreach ($custom_filters as $filter) {
  215             // maybe "HTMLPurifier_Filter_$filter", but be consistent with AutoFormat
  216             $filters[] = $filter;
  217         }
  218         $filters = array_merge($filters, $this->filters);
  219         // maybe prepare(), but later
  220 
  221         for ($i = 0, $filter_size = count($filters); $i < $filter_size; $i++) {
  222             $html = $filters[$i]->preFilter($html, $config, $context);
  223         }
  224 
  225         // purified HTML
  226         $html =
  227             $this->generator->generateFromTokens(
  228             // list of tokens
  229                 $this->strategy->execute(
  230                 // list of un-purified tokens
  231                     $lexer->tokenizeHTML(
  232                     // un-purified HTML
  233                         $html,
  234                         $config,
  235                         $context
  236                     ),
  237                     $config,
  238                     $context
  239                 )
  240             );
  241 
  242         for ($i = $filter_size - 1; $i >= 0; $i--) {
  243             $html = $filters[$i]->postFilter($html, $config, $context);
  244         }
  245 
  246         $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context);
  247         $this->context =& $context;
  248         return $html;
  249     }
  250 
  251     /**
  252      * Filters an array of HTML snippets
  253      *
  254      * @param string[] $array_of_html Array of html snippets
  255      * @param HTMLPurifier_Config $config Optional config object for this operation.
  256      *                See HTMLPurifier::purify() for more details.
  257      *
  258      * @return string[] Array of purified HTML
  259      */
  260     public function purifyArray($array_of_html, $config = null)
  261     {
  262         $context_array = array();
  263         foreach ($array_of_html as $key => $html) {
  264             $array_of_html[$key] = $this->purify($html, $config);
  265             $context_array[$key] = $this->context;
  266         }
  267         $this->context = $context_array;
  268         return $array_of_html;
  269     }
  270 
  271     /**
  272      * Singleton for enforcing just one HTML Purifier in your system
  273      *
  274      * @param HTMLPurifier|HTMLPurifier_Config $prototype Optional prototype
  275      *                   HTMLPurifier instance to overload singleton with,
  276      *                   or HTMLPurifier_Config instance to configure the
  277      *                   generated version with.
  278      *
  279      * @return HTMLPurifier
  280      */
  281     public static function instance($prototype = null)
  282     {
  283         if (!self::$instance || $prototype) {
  284             if ($prototype instanceof HTMLPurifier) {
  285                 self::$instance = $prototype;
  286             } elseif ($prototype) {
  287                 self::$instance = new HTMLPurifier($prototype);
  288             } else {
  289                 self::$instance = new HTMLPurifier();
  290             }
  291         }
  292         return self::$instance;
  293     }
  294 
  295     /**
  296      * Singleton for enforcing just one HTML Purifier in your system
  297      *
  298      * @param HTMLPurifier|HTMLPurifier_Config $prototype Optional prototype
  299      *                   HTMLPurifier instance to overload singleton with,
  300      *                   or HTMLPurifier_Config instance to configure the
  301      *                   generated version with.
  302      *
  303      * @return HTMLPurifier
  304      * @note Backwards compatibility, see instance()
  305      */
  306     public static function getInstance($prototype = null)
  307     {
  308         return HTMLPurifier::instance($prototype);
  309     }
  310 }
  311 
  312 
  313 
  314 
  315 
  316 /**
  317  * Converts a stream of HTMLPurifier_Token into an HTMLPurifier_Node,
  318  * and back again.
  319  *
  320  * @note This transformation is not an equivalence.  We mutate the input
  321  * token stream to make it so; see all [MUT] markers in code.
  322  */
  323 class HTMLPurifier_Arborize
  324 {
  325     public static function arborize($tokens, $config, $context) {
  326         $definition = $config->getHTMLDefinition();
  327         $parent = new HTMLPurifier_Token_Start($definition->info_parent);
  328         $stack = array($parent->toNode());
  329         foreach ($tokens as $token) {
  330             $token->skip = null; // [MUT]
  331             $token->carryover = null; // [MUT]
  332             if ($token instanceof HTMLPurifier_Token_End) {
  333                 $token->start = null; // [MUT]
  334                 $r = array_pop($stack);
  335                 //assert($r->name === $token->name);
  336                 //assert(empty($token->attr));
  337                 $r->endCol = $token->col;
  338                 $r->endLine = $token->line;
  339                 $r->endArmor = $token->armor;
  340                 continue;
  341             }
  342             $node = $token->toNode();
  343             $stack[count($stack)-1]->children[] = $node;
  344             if ($token instanceof HTMLPurifier_Token_Start) {
  345                 $stack[] = $node;
  346             }
  347         }
  348         //assert(count($stack) == 1);
  349         return $stack[0];
  350     }
  351 
  352     public static function flatten($node, $config, $context) {
  353         $level = 0;
  354         $nodes = array($level => new HTMLPurifier_Queue(array($node)));
  355         $closingTokens = array();
  356         $tokens = array();
  357         do {
  358             while (!$nodes[$level]->isEmpty()) {
  359                 $node = $nodes[$level]->shift(); // FIFO
  360                 list($start, $end) = $node->toTokenPair();
  361                 if ($level > 0) {
  362                     $tokens[] = $start;
  363                 }
  364                 if ($end !== NULL) {
  365                     $closingTokens[$level][] = $end;
  366                 }
  367                 if ($node instanceof HTMLPurifier_Node_Element) {
  368                     $level++;
  369                     $nodes[$level] = new HTMLPurifier_Queue();
  370                     foreach ($node->children as $childNode) {
  371                         $nodes[$level]->push($childNode);
  372                     }
  373                 }
  374             }
  375             $level--;
  376             if ($level && isset($closingTokens[$level])) {
  377                 while ($token = array_pop($closingTokens[$level])) {
  378                     $tokens[] = $token;
  379                 }
  380             }
  381         } while ($level > 0);
  382         return $tokens;
  383     }
  384 }
  385 
  386 
  387 
  388 /**
  389  * Defines common attribute collections that modules reference
  390  */
  391 
  392 class HTMLPurifier_AttrCollections
  393 {
  394 
  395     /**
  396      * Associative array of attribute collections, indexed by name.
  397      * @type array
  398      */
  399     public $info = array();
  400 
  401     /**
  402      * Performs all expansions on internal data for use by other inclusions
  403      * It also collects all attribute collection extensions from
  404      * modules
  405      * @param HTMLPurifier_AttrTypes $attr_types HTMLPurifier_AttrTypes instance
  406      * @param HTMLPurifier_HTMLModule[] $modules Hash array of HTMLPurifier_HTMLModule members
  407      */
  408     public function __construct($attr_types, $modules)
  409     {
  410         $this->doConstruct($attr_types, $modules);
  411     }
  412 
  413     public function doConstruct($attr_types, $modules)
  414     {
  415         // load extensions from the modules
  416         foreach ($modules as $module) {
  417             foreach ($module->attr_collections as $coll_i => $coll) {
  418                 if (!isset($this->info[$coll_i])) {
  419                     $this->info[$coll_i] = array();
  420                 }
  421                 foreach ($coll as $attr_i => $attr) {
  422                     if ($attr_i === 0 && isset($this->info[$coll_i][$attr_i])) {
  423                         // merge in includes
  424                         $this->info[$coll_i][$attr_i] = array_merge(
  425                             $this->info[$coll_i][$attr_i],
  426                             $attr
  427                         );
  428                         continue;
  429                     }
  430                     $this->info[$coll_i][$attr_i] = $attr;
  431                 }
  432             }
  433         }
  434         // perform internal expansions and inclusions
  435         foreach ($this->info as $name => $attr) {
  436             // merge attribute collections that include others
  437             $this->performInclusions($this->info[$name]);
  438             // replace string identifiers with actual attribute objects
  439             $this->expandIdentifiers($this->info[$name], $attr_types);
  440         }
  441     }
  442 
  443     /**
  444      * Takes a reference to an attribute associative array and performs
  445      * all inclusions specified by the zero index.
  446      * @param array &$attr Reference to attribute array
  447      */
  448     public function performInclusions(&$attr)
  449     {
  450         if (!isset($attr[0])) {
  451             return;
  452         }
  453         $merge = $attr[0];
  454         $seen  = array(); // recursion guard
  455         // loop through all the inclusions
  456         for ($i = 0; isset($merge[$i]); $i++) {
  457             if (isset($seen[$merge[$i]])) {
  458                 continue;
  459             }
  460             $seen[$merge[$i]] = true;
  461             // foreach attribute of the inclusion, copy it over
  462             if (!isset($this->info[$merge[$i]])) {
  463                 continue;
  464             }
  465             foreach ($this->info[$merge[$i]] as $key => $value) {
  466                 if (isset($attr[$key])) {
  467                     continue;
  468                 } // also catches more inclusions
  469                 $attr[$key] = $value;
  470             }
  471             if (isset($this->info[$merge[$i]][0])) {
  472                 // recursion
  473                 $merge = array_merge($merge, $this->info[$merge[$i]][0]);
  474             }
  475         }
  476         unset($attr[0]);
  477     }
  478 
  479     /**
  480      * Expands all string identifiers in an attribute array by replacing
  481      * them with the appropriate values inside HTMLPurifier_AttrTypes
  482      * @param array &$attr Reference to attribute array
  483      * @param HTMLPurifier_AttrTypes $attr_types HTMLPurifier_AttrTypes instance
  484      */
  485     public function expandIdentifiers(&$attr, $attr_types)
  486     {
  487         // because foreach will process new elements we add, make sure we
  488         // skip duplicates
  489         $processed = array();
  490 
  491         foreach ($attr as $def_i => $def) {
  492             // skip inclusions
  493             if ($def_i === 0) {
  494                 continue;
  495             }
  496 
  497             if (isset($processed[$def_i])) {
  498                 continue;
  499             }
  500 
  501             // determine whether or not attribute is required
  502             if ($required = (strpos($def_i, '*') !== false)) {
  503                 // rename the definition
  504                 unset($attr[$def_i]);
  505                 $def_i = trim($def_i, '*');
  506                 $attr[$def_i] = $def;
  507             }
  508 
  509             $processed[$def_i] = true;
  510 
  511             // if we've already got a literal object, move on
  512             if (is_object($def)) {
  513                 // preserve previous required
  514                 $attr[$def_i]->required = ($required || $attr[$def_i]->required);
  515                 continue;
  516             }
  517 
  518             if ($def === false) {
  519                 unset($attr[$def_i]);
  520                 continue;
  521             }
  522 
  523             if ($t = $attr_types->get($def)) {
  524                 $attr[$def_i] = $t;
  525                 $attr[$def_i]->required = $required;
  526             } else {
  527                 unset($attr[$def_i]);
  528             }
  529         }
  530     }
  531 }
  532 
  533 
  534 
  535 
  536 
  537 /**
  538  * Base class for all validating attribute definitions.
  539  *
  540  * This family of classes forms the core for not only HTML attribute validation,
  541  * but also any sort of string that needs to be validated or cleaned (which
  542  * means CSS properties and composite definitions are defined here too).
  543  * Besides defining (through code) what precisely makes the string valid,
  544  * subclasses are also responsible for cleaning the code if possible.
  545  */
  546 
  547 abstract class HTMLPurifier_AttrDef
  548 {
  549 
  550     /**
  551      * Tells us whether or not an HTML attribute is minimized.
  552      * Has no meaning in other contexts.
  553      * @type bool
  554      */
  555     public $minimized = false;
  556 
  557     /**
  558      * Tells us whether or not an HTML attribute is required.
  559      * Has no meaning in other contexts
  560      * @type bool
  561      */
  562     public $required = false;
  563 
  564     /**
  565      * Validates and cleans passed string according to a definition.
  566      *
  567      * @param string $string String to be validated and cleaned.
  568      * @param HTMLPurifier_Config $config Mandatory HTMLPurifier_Config object.
  569      * @param HTMLPurifier_Context $context Mandatory HTMLPurifier_Context object.
  570      */
  571     abstract public function validate($string, $config, $context);
  572 
  573     /**
  574      * Convenience method that parses a string as if it were CDATA.
  575      *
  576      * This method process a string in the manner specified at
  577      * <http://www.w3.org/TR/html4/types.html#h-6.2> by removing
  578      * leading and trailing whitespace, ignoring line feeds, and replacing
  579      * carriage returns and tabs with spaces.  While most useful for HTML
  580      * attributes specified as CDATA, it can also be applied to most CSS
  581      * values.
  582      *
  583      * @note This method is not entirely standards compliant, as trim() removes
  584      *       more types of whitespace than specified in the spec. In practice,
  585      *       this is rarely a problem, as those extra characters usually have
  586      *       already been removed by HTMLPurifier_Encoder.
  587      *
  588      * @warning This processing is inconsistent with XML's whitespace handling
  589      *          as specified by section 3.3.3 and referenced XHTML 1.0 section
  590      *          4.7.  However, note that we are NOT necessarily
  591      *          parsing XML, thus, this behavior may still be correct. We
  592      *          assume that newlines have been normalized.
  593      */
  594     public function parseCDATA($string)
  595     {
  596         $string = trim($string);
  597         $string = str_replace(array("\n", "\t", "\r"), ' ', $string);
  598         return $string;
  599     }
  600 
  601     /**
  602      * Factory method for creating this class from a string.
  603      * @param string $string String construction info
  604      * @return HTMLPurifier_AttrDef Created AttrDef object corresponding to $string
  605      */
  606     public function make($string)
  607     {
  608         // default implementation, return a flyweight of this object.
  609         // If $string has an effect on the returned object (i.e. you
  610         // need to overload this method), it is best
  611         // to clone or instantiate new copies. (Instantiation is safer.)
  612         return $this;
  613     }
  614 
  615     /**
  616      * Removes spaces from rgb(0, 0, 0) so that shorthand CSS properties work
  617      * properly. THIS IS A HACK!
  618      * @param string $string a CSS colour definition
  619      * @return string
  620      */
  621     protected function mungeRgb($string)
  622     {
  623         $p = '\s*(\d+(\.\d+)?([%]?))\s*';
  624 
  625         if (preg_match('/(rgba|hsla)\(/', $string)) {
  626             return preg_replace('/(rgba|hsla)\('.$p.','.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8,\11)', $string);
  627         }
  628 
  629         return preg_replace('/(rgb|hsl)\('.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8)', $string);
  630     }
  631 
  632     /**
  633      * Parses a possibly escaped CSS string and returns the "pure"
  634      * version of it.
  635      */
  636     protected function expandCSSEscape($string)
  637     {
  638         // flexibly parse it
  639         $ret = '';
  640         for ($i = 0, $c = strlen($string); $i < $c; $i++) {
  641             if ($string[$i] === '\\') {
  642                 $i++;
  643                 if ($i >= $c) {
  644                     $ret .= '\\';
  645                     break;
  646                 }
  647                 if (ctype_xdigit($string[$i])) {
  648                     $code = $string[$i];
  649                     for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
  650                         if (!ctype_xdigit($string[$i])) {
  651                             break;
  652                         }
  653                         $code .= $string[$i];
  654                     }
  655                     // We have to be extremely careful when adding
  656                     // new characters, to make sure we're not breaking
  657                     // the encoding.
  658                     $char = HTMLPurifier_Encoder::unichr(hexdec($code));
  659                     if (HTMLPurifier_Encoder::cleanUTF8($char) === '') {
  660                         continue;
  661                     }
  662                     $ret .= $char;
  663                     if ($i < $c && trim($string[$i]) !== '') {
  664                         $i--;
  665                     }
  666                     continue;
  667                 }
  668                 if ($string[$i] === "\n") {
  669                     continue;
  670                 }
  671             }
  672             $ret .= $string[$i];
  673         }
  674         return $ret;
  675     }
  676 }
  677 
  678 
  679 
  680 
  681 
  682 /**
  683  * Processes an entire attribute array for corrections needing multiple values.
  684  *
  685  * Occasionally, a certain attribute will need to be removed and popped onto
  686  * another value.  Instead of creating a complex return syntax for
  687  * HTMLPurifier_AttrDef, we just pass the whole attribute array to a
  688  * specialized object and have that do the special work.  That is the
  689  * family of HTMLPurifier_AttrTransform.
  690  *
  691  * An attribute transformation can be assigned to run before or after
  692  * HTMLPurifier_AttrDef validation.  See HTMLPurifier_HTMLDefinition for
  693  * more details.
  694  */
  695 
  696 abstract class HTMLPurifier_AttrTransform
  697 {
  698 
  699     /**
  700      * Abstract: makes changes to the attributes dependent on multiple values.
  701      *
  702      * @param array $attr Assoc array of attributes, usually from
  703      *              HTMLPurifier_Token_Tag::$attr
  704      * @param HTMLPurifier_Config $config Mandatory HTMLPurifier_Config object.
  705      * @param HTMLPurifier_Context $context Mandatory HTMLPurifier_Context object
  706      * @return array Processed attribute array.
  707      */
  708     abstract public function transform($attr, $config, $context);
  709 
  710     /**
  711      * Prepends CSS properties to the style attribute, creating the
  712      * attribute if it doesn't exist.
  713      * @param array &$attr Attribute array to process (passed by reference)
  714      * @param string $css CSS to prepend
  715      */
  716     public function prependCSS(&$attr, $css)
  717     {
  718         $attr['style'] = isset($attr['style']) ? $attr['style'] : '';
  719         $attr['style'] = $css . $attr['style'];
  720     }
  721 
  722     /**
  723      * Retrieves and removes an attribute
  724      * @param array &$attr Attribute array to process (passed by reference)
  725      * @param mixed $key Key of attribute to confiscate
  726      * @return mixed
  727      */
  728     public function confiscateAttr(&$attr, $key)
  729     {
  730         if (!isset($attr[$key])) {
  731             return null;
  732         }
  733         $value = $attr[$key];
  734         unset($attr[$key]);
  735         return $value;
  736     }
  737 }
  738 
  739 
  740 
  741 
  742 
  743 /**
  744  * Provides lookup array of attribute types to HTMLPurifier_AttrDef objects
  745  */
  746 class HTMLPurifier_AttrTypes
  747 {
  748     /**
  749      * Lookup array of attribute string identifiers to concrete implementations.
  750      * @type HTMLPurifier_AttrDef[]
  751      */
  752     protected $info = array();
  753 
  754     /**
  755      * Constructs the info array, supplying default implementations for attribute
  756      * types.
  757      */
  758     public function __construct()
  759     {
  760         // XXX This is kind of poor, since we don't actually /clone/
  761         // instances; instead, we use the supplied make() attribute. So,
  762         // the underlying class must know how to deal with arguments.
  763         // With the old implementation of Enum, that ignored its
  764         // arguments when handling a make dispatch, the IAlign
  765         // definition wouldn't work.
  766 
  767         // pseudo-types, must be instantiated via shorthand
  768         $this->info['Enum']    = new HTMLPurifier_AttrDef_Enum();
  769         $this->info['Bool']    = new HTMLPurifier_AttrDef_HTML_Bool();
  770 
  771         $this->info['CDATA']    = new HTMLPurifier_AttrDef_Text();
  772         $this->info['ID']       = new HTMLPurifier_AttrDef_HTML_ID();
  773         $this->info['Length']   = new HTMLPurifier_AttrDef_HTML_Length();
  774         $this->info['MultiLength'] = new HTMLPurifier_AttrDef_HTML_MultiLength();
  775         $this->info['NMTOKENS'] = new HTMLPurifier_AttrDef_HTML_Nmtokens();
  776         $this->info['Pixels']   = new HTMLPurifier_AttrDef_HTML_Pixels();
  777         $this->info['Text']     = new HTMLPurifier_AttrDef_Text();
  778         $this->info['URI']      = new HTMLPurifier_AttrDef_URI();
  779         $this->info['LanguageCode'] = new HTMLPurifier_AttrDef_Lang();
  780         $this->info['Color']    = new HTMLPurifier_AttrDef_HTML_Color();
  781         $this->info['IAlign']   = self::makeEnum('top,middle,bottom,left,right');
  782         $this->info['LAlign']   = self::makeEnum('top,bottom,left,right');
  783         $this->info['FrameTarget'] = new HTMLPurifier_AttrDef_HTML_FrameTarget();
  784 
  785         // unimplemented aliases
  786         $this->info['ContentType'] = new HTMLPurifier_AttrDef_Text();
  787         $this->info['ContentTypes'] = new HTMLPurifier_AttrDef_Text();
  788         $this->info['Charsets'] = new HTMLPurifier_AttrDef_Text();
  789         $this->info['Character'] = new HTMLPurifier_AttrDef_Text();
  790 
  791         // "proprietary" types
  792         $this->info['Class'] = new HTMLPurifier_AttrDef_HTML_Class();
  793 
  794         // number is really a positive integer (one or more digits)
  795         // FIXME: ^^ not always, see start and value of list items
  796         $this->info['Number']   = new HTMLPurifier_AttrDef_Integer(false, false, true);
  797     }
  798 
  799     private static function makeEnum($in)
  800     {
  801         return new HTMLPurifier_AttrDef_Clone(new HTMLPurifier_AttrDef_Enum(explode(',', $in)));
  802     }
  803 
  804     /**
  805      * Retrieves a type
  806      * @param string $type String type name
  807      * @return HTMLPurifier_AttrDef Object AttrDef for type
  808      */
  809     public function get($type)
  810     {
  811         // determine if there is any extra info tacked on
  812         if (strpos($type, '#') !== false) {
  813             list($type, $string) = explode('#', $type, 2);
  814         } else {
  815             $string = '';
  816         }
  817 
  818         if (!isset($this->info[$type])) {
  819             trigger_error('Cannot retrieve undefined attribute type ' . $type, E_USER_ERROR);
  820             return;
  821         }
  822         return $this->info[$type]->make($string);
  823     }
  824 
  825     /**
  826      * Sets a new implementation for a type
  827      * @param string $type String type name
  828      * @param HTMLPurifier_AttrDef $impl Object AttrDef for type
  829      */
  830     public function set($type, $impl)
  831     {
  832         $this->info[$type] = $impl;
  833     }
  834 }
  835 
  836 
  837 
  838 
  839 
  840 /**
  841  * Validates the attributes of a token. Doesn't manage required attributes
  842  * very well. The only reason we factored this out was because RemoveForeignElements
  843  * also needed it besides ValidateAttributes.
  844  */
  845 class HTMLPurifier_AttrValidator
  846 {
  847 
  848     /**
  849      * Validates the attributes of a token, mutating it as necessary.
  850      * that has valid tokens
  851      * @param HTMLPurifier_Token $token Token to validate.
  852      * @param HTMLPurifier_Config $config Instance of HTMLPurifier_Config
  853      * @param HTMLPurifier_Context $context Instance of HTMLPurifier_Context
  854      */
  855     public function validateToken($token, $config, $context)
  856     {
  857         $definition = $config->getHTMLDefinition();
  858         $e =& $context->get('ErrorCollector', true);
  859 
  860         // initialize IDAccumulator if necessary
  861         $ok =& $context->get('IDAccumulator', true);
  862         if (!$ok) {
  863             $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
  864             $context->register('IDAccumulator', $id_accumulator);
  865         }
  866 
  867         // initialize CurrentToken if necessary
  868         $current_token =& $context->get('CurrentToken', true);
  869         if (!$current_token) {
  870             $context->register('CurrentToken', $token);
  871         }
  872 
  873         if (!$token instanceof HTMLPurifier_Token_Start &&
  874             !$token instanceof HTMLPurifier_Token_Empty
  875         ) {
  876             return;
  877         }
  878 
  879         // create alias to global definition array, see also $defs
  880         // DEFINITION CALL
  881         $d_defs = $definition->info_global_attr;
  882 
  883         // don't update token until the very end, to ensure an atomic update
  884         $attr = $token->attr;
  885 
  886         // do global transformations (pre)
  887         // nothing currently utilizes this
  888         foreach ($definition->info_attr_transform_pre as $transform) {
  889             $attr = $transform->transform($o = $attr, $config, $context);
  890             if ($e) {
  891                 if ($attr != $o) {
  892                     $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
  893                 }
  894             }
  895         }
  896 
  897         // do local transformations only applicable to this element (pre)
  898         // ex. <p align="right"> to <p style="text-align:right;">
  899         foreach ($definition->info[$token->name]->attr_transform_pre as $transform) {
  900             $attr = $transform->transform($o = $attr, $config, $context);
  901             if ($e) {
  902                 if ($attr != $o) {
  903                     $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
  904                 }
  905             }
  906         }
  907 
  908         // create alias to this element's attribute definition array, see
  909         // also $d_defs (global attribute definition array)
  910         // DEFINITION CALL
  911         $defs = $definition->info[$token->name]->attr;
  912 
  913         $attr_key = false;
  914         $context->register('CurrentAttr', $attr_key);
  915 
  916         // iterate through all the attribute keypairs
  917         // Watch out for name collisions: $key has previously been used
  918         foreach ($attr as $attr_key => $value) {
  919 
  920             // call the definition
  921             if (isset($defs[$attr_key])) {
  922                 // there is a local definition defined
  923                 if ($defs[$attr_key] === false) {
  924                     // We've explicitly been told not to allow this element.
  925                     // This is usually when there's a global definition
  926                     // that must be overridden.
  927                     // Theoretically speaking, we could have a
  928                     // AttrDef_DenyAll, but this is faster!
  929                     $result = false;
  930                 } else {
  931                     // validate according to the element's definition
  932                     $result = $defs[$attr_key]->validate(
  933                         $value,
  934                         $config,
  935                         $context
  936                     );
  937                 }
  938             } elseif (isset($d_defs[$attr_key])) {
  939                 // there is a global definition defined, validate according
  940                 // to the global definition
  941                 $result = $d_defs[$attr_key]->validate(
  942                     $value,
  943                     $config,
  944                     $context
  945                 );
  946             } else {
  947                 // system never heard of the attribute? DELETE!
  948                 $result = false;
  949             }
  950 
  951             // put the results into effect
  952             if ($result === false || $result === null) {
  953                 // this is a generic error message that should replaced
  954                 // with more specific ones when possible
  955                 if ($e) {
  956                     $e->send(E_ERROR, 'AttrValidator: Attribute removed');
  957                 }
  958 
  959                 // remove the attribute
  960                 unset($attr[$attr_key]);
  961             } elseif (is_string($result)) {
  962                 // generally, if a substitution is happening, there
  963                 // was some sort of implicit correction going on. We'll
  964                 // delegate it to the attribute classes to say exactly what.
  965 
  966                 // simple substitution
  967                 $attr[$attr_key] = $result;
  968             } else {
  969                 // nothing happens
  970             }
  971 
  972             // we'd also want slightly more complicated substitution
  973             // involving an array as the return value,
  974             // although we're not sure how colliding attributes would
  975             // resolve (certain ones would be completely overriden,
  976             // others would prepend themselves).
  977         }
  978 
  979         $context->destroy('CurrentAttr');
  980 
  981         // post transforms
  982 
  983         // global (error reporting untested)
  984         foreach ($definition->info_attr_transform_post as $transform) {
  985             $attr = $transform->transform($o = $attr, $config, $context);
  986             if ($e) {
  987                 if ($attr != $o) {
  988                     $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
  989                 }
  990             }
  991         }
  992 
  993         // local (error reporting untested)
  994         foreach ($definition->info[$token->name]->attr_transform_post as $transform) {
  995             $attr = $transform->transform($o = $attr, $config, $context);
  996             if ($e) {
  997                 if ($attr != $o) {
  998                     $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
  999                 }
 1000             }
 1001         }
 1002 
 1003         $token->attr = $attr;
 1004 
 1005         // destroy CurrentToken if we made it ourselves
 1006         if (!$current_token) {
 1007             $context->destroy('CurrentToken');
 1008         }
 1009 
 1010     }
 1011 
 1012 
 1013 }
 1014 
 1015 
 1016 
 1017 
 1018 
 1019 // constants are slow, so we use as few as possible
 1020 if (!defined('HTMLPURIFIER_PREFIX')) {
 1021     define('HTMLPURIFIER_PREFIX', dirname(__FILE__) . '/standalone');
 1022     set_include_path(HTMLPURIFIER_PREFIX . PATH_SEPARATOR . get_include_path());
 1023 }
 1024 
 1025 // accomodations for versions earlier than 5.0.2
 1026 // borrowed from PHP_Compat, LGPL licensed, by Aidan Lister <aidan@php.net>
 1027 if (!defined('PHP_EOL')) {
 1028     switch (strtoupper(substr(PHP_OS, 0, 3))) {
 1029         case 'WIN':
 1030             define('PHP_EOL', "\r\n");
 1031             break;
 1032         case 'DAR':
 1033             define('PHP_EOL', "\r");
 1034             break;
 1035         default:
 1036             define('PHP_EOL', "\n");
 1037     }
 1038 }
 1039 
 1040 /**
 1041  * Bootstrap class that contains meta-functionality for HTML Purifier such as
 1042  * the autoload function.
 1043  *
 1044  * @note
 1045  *      This class may be used without any other files from HTML Purifier.
 1046  */
 1047 class HTMLPurifier_Bootstrap
 1048 {
 1049 
 1050     /**
 1051      * Autoload function for HTML Purifier
 1052      * @param string $class Class to load
 1053      * @return bool
 1054      */
 1055     public static function autoload($class)
 1056     {
 1057         $file = HTMLPurifier_Bootstrap::getPath($class);
 1058         if (!$file) {
 1059             return false;
 1060         }
 1061         // Technically speaking, it should be ok and more efficient to
 1062         // just do 'require', but Antonio Parraga reports that with
 1063         // Zend extensions such as Zend debugger and APC, this invariant
 1064         // may be broken.  Since we have efficient alternatives, pay
 1065         // the cost here and avoid the bug.
 1066         require_once HTMLPURIFIER_PREFIX . '/' . $file;
 1067         return true;
 1068     }
 1069 
 1070     /**
 1071      * Returns the path for a specific class.
 1072      * @param string $class Class path to get
 1073      * @return string
 1074      */
 1075     public static function getPath($class)
 1076     {
 1077         if (strncmp('HTMLPurifier', $class, 12) !== 0) {
 1078             return false;
 1079         }
 1080         // Custom implementations
 1081         if (strncmp('HTMLPurifier_Language_', $class, 22) === 0) {
 1082             $code = str_replace('_', '-', substr($class, 22));
 1083             $file = 'HTMLPurifier/Language/classes/' . $code . '.php';
 1084         } else {
 1085             $file = str_replace('_', '/', $class) . '.php';
 1086         }
 1087         if (!file_exists(HTMLPURIFIER_PREFIX . '/' . $file)) {
 1088             return false;
 1089         }
 1090         return $file;
 1091     }
 1092 
 1093     /**
 1094      * "Pre-registers" our autoloader on the SPL stack.
 1095      */
 1096     public static function registerAutoload()
 1097     {
 1098         $autoload = array('HTMLPurifier_Bootstrap', 'autoload');
 1099         if (($funcs = spl_autoload_functions()) === false) {
 1100             spl_autoload_register($autoload);
 1101         } elseif (function_exists('spl_autoload_unregister')) {
 1102             if (version_compare(PHP_VERSION, '5.3.0', '>=')) {
 1103                 // prepend flag exists, no need for shenanigans
 1104                 spl_autoload_register($autoload, true, true);
 1105             } else {
 1106                 $buggy  = version_compare(PHP_VERSION, '5.2.11', '<');
 1107                 $compat = version_compare(PHP_VERSION, '5.1.2', '<=') &&
 1108                     version_compare(PHP_VERSION, '5.1.0', '>=');
 1109                 foreach ($funcs as $func) {
 1110                     if ($buggy && is_array($func)) {
 1111                         // :TRICKY: There are some compatibility issues and some
 1112                         // places where we need to error out
 1113                         $reflector = new ReflectionMethod($func[0], $func[1]);
 1114                         if (!$reflector->isStatic()) {
 1115                             throw new Exception(
 1116                                 'HTML Purifier autoloader registrar is not compatible
 1117                                 with non-static object methods due to PHP Bug #44144;
 1118                                 Please do not use HTMLPurifier.autoload.php (or any
 1119                                 file that includes this file); instead, place the code:
 1120                                 spl_autoload_register(array(\'HTMLPurifier_Bootstrap\', \'autoload\'))
 1121                                 after your own autoloaders.'
 1122                             );
 1123                         }
 1124                         // Suprisingly, spl_autoload_register supports the
 1125                         // Class::staticMethod callback format, although call_user_func doesn't
 1126                         if ($compat) {
 1127                             $func = implode('::', $func);
 1128                         }
 1129                     }
 1130                     spl_autoload_unregister($func);
 1131                 }
 1132                 spl_autoload_register($autoload);
 1133                 foreach ($funcs as $func) {
 1134                     spl_autoload_register($func);
 1135                 }
 1136             }
 1137         }
 1138     }
 1139 }
 1140 
 1141 
 1142 
 1143 
 1144 
 1145 /**
 1146  * Super-class for definition datatype objects, implements serialization
 1147  * functions for the class.
 1148  */
 1149 abstract class HTMLPurifier_Definition
 1150 {
 1151 
 1152     /**
 1153      * Has setup() been called yet?
 1154      * @type bool
 1155      */
 1156     public $setup = false;
 1157 
 1158     /**
 1159      * If true, write out the final definition object to the cache after
 1160      * setup.  This will be true only if all invocations to get a raw
 1161      * definition object are also optimized.  This does not cause file
 1162      * system thrashing because on subsequent calls the cached object
 1163      * is used and any writes to the raw definition object are short
 1164      * circuited.  See enduser-customize.html for the high-level
 1165      * picture.
 1166      * @type bool
 1167      */
 1168     public $optimized = null;
 1169 
 1170     /**
 1171      * What type of definition is it?
 1172      * @type string
 1173      */
 1174     public $type;
 1175 
 1176     /**
 1177      * Sets up the definition object into the final form, something
 1178      * not done by the constructor
 1179      * @param HTMLPurifier_Config $config
 1180      */
 1181     abstract protected function doSetup($config);
 1182 
 1183     /**
 1184      * Setup function that aborts if already setup
 1185      * @param HTMLPurifier_Config $config
 1186      */
 1187     public function setup($config)
 1188     {
 1189         if ($this->setup) {
 1190             return;
 1191         }
 1192         $this->setup = true;
 1193         $this->doSetup($config);
 1194     }
 1195 }
 1196 
 1197 
 1198 
 1199 
 1200 
 1201 /**
 1202  * Defines allowed CSS attributes and what their values are.
 1203  * @see HTMLPurifier_HTMLDefinition
 1204  */
 1205 class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
 1206 {
 1207 
 1208     public $type = 'CSS';
 1209 
 1210     /**
 1211      * Assoc array of attribute name to definition object.
 1212      * @type HTMLPurifier_AttrDef[]
 1213      */
 1214     public $info = array();
 1215 
 1216     /**
 1217      * Constructs the info array.  The meat of this class.
 1218      * @param HTMLPurifier_Config $config
 1219      */
 1220     protected function doSetup($config)
 1221     {
 1222         $this->info['text-align'] = new HTMLPurifier_AttrDef_Enum(
 1223             array('left', 'right', 'center', 'justify'),
 1224             false
 1225         );
 1226 
 1227         $border_style =
 1228         $this->info['border-bottom-style'] =
 1229         $this->info['border-right-style'] =
 1230         $this->info['border-left-style'] =
 1231         $this->info['border-top-style'] = new HTMLPurifier_AttrDef_Enum(
 1232             array(
 1233                 'none',
 1234                 'hidden',
 1235                 'dotted',
 1236                 'dashed',
 1237                 'solid',
 1238                 'double',
 1239                 'groove',
 1240                 'ridge',
 1241                 'inset',
 1242                 'outset'
 1243             ),
 1244             false
 1245         );
 1246 
 1247         $this->info['border-style'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_style);
 1248 
 1249         $this->info['clear'] = new HTMLPurifier_AttrDef_Enum(
 1250             array('none', 'left', 'right', 'both'),
 1251             false
 1252         );
 1253         $this->info['float'] = new HTMLPurifier_AttrDef_Enum(
 1254             array('none', 'left', 'right'),
 1255             false
 1256         );
 1257         $this->info['font-style'] = new HTMLPurifier_AttrDef_Enum(
 1258             array('normal', 'italic', 'oblique'),
 1259             false
 1260         );
 1261         $this->info['font-variant'] = new HTMLPurifier_AttrDef_Enum(
 1262             array('normal', 'small-caps'),
 1263             false
 1264         );
 1265 
 1266         $uri_or_none = new HTMLPurifier_AttrDef_CSS_Composite(
 1267             array(
 1268                 new HTMLPurifier_AttrDef_Enum(array('none')),
 1269                 new HTMLPurifier_AttrDef_CSS_URI()
 1270             )
 1271         );
 1272 
 1273         $this->info['list-style-position'] = new HTMLPurifier_AttrDef_Enum(
 1274             array('inside', 'outside'),
 1275             false
 1276         );
 1277         $this->info['list-style-type'] = new HTMLPurifier_AttrDef_Enum(
 1278             array(
 1279                 'disc',
 1280                 'circle',
 1281                 'square',
 1282                 'decimal',
 1283                 'lower-roman',
 1284                 'upper-roman',
 1285                 'lower-alpha',
 1286                 'upper-alpha',
 1287                 'none'
 1288             ),
 1289             false
 1290         );
 1291         $this->info['list-style-image'] = $uri_or_none;
 1292 
 1293         $this->info['list-style'] = new HTMLPurifier_AttrDef_CSS_ListStyle($config);
 1294 
 1295         $this->info['text-transform'] = new HTMLPurifier_AttrDef_Enum(
 1296             array('capitalize', 'uppercase', 'lowercase', 'none'),
 1297             false
 1298         );
 1299         $this->info['color'] = new HTMLPurifier_AttrDef_CSS_Color();
 1300 
 1301         $this->info['background-image'] = $uri_or_none;
 1302         $this->info['background-repeat'] = new HTMLPurifier_AttrDef_Enum(
 1303             array('repeat', 'repeat-x', 'repeat-y', 'no-repeat')
 1304         );
 1305         $this->info['background-attachment'] = new HTMLPurifier_AttrDef_Enum(
 1306             array('scroll', 'fixed')
 1307         );
 1308         $this->info['background-position'] = new HTMLPurifier_AttrDef_CSS_BackgroundPosition();
 1309 
 1310         $border_color =
 1311         $this->info['border-top-color'] =
 1312         $this->info['border-bottom-color'] =
 1313         $this->info['border-left-color'] =
 1314         $this->info['border-right-color'] =
 1315         $this->info['background-color'] = new HTMLPurifier_AttrDef_CSS_Composite(
 1316             array(
 1317                 new HTMLPurifier_AttrDef_Enum(array('transparent')),
 1318                 new HTMLPurifier_AttrDef_CSS_Color()
 1319             )
 1320         );
 1321 
 1322         $this->info['background'] = new HTMLPurifier_AttrDef_CSS_Background($config);
 1323 
 1324         $this->info['border-color'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_color);
 1325 
 1326         $border_width =
 1327         $this->info['border-top-width'] =
 1328         $this->info['border-bottom-width'] =
 1329         $this->info['border-left-width'] =
 1330         $this->info['border-right-width'] = new HTMLPurifier_AttrDef_CSS_Composite(
 1331             array(
 1332                 new HTMLPurifier_AttrDef_Enum(array('thin', 'medium', 'thick')),
 1333                 new HTMLPurifier_AttrDef_CSS_Length('0') //disallow negative
 1334             )
 1335         );
 1336 
 1337         $this->info['border-width'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_width);
 1338 
 1339         $this->info['letter-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(
 1340             array(
 1341                 new HTMLPurifier_AttrDef_Enum(array('normal')),
 1342                 new HTMLPurifier_AttrDef_CSS_Length()
 1343             )
 1344         );
 1345 
 1346         $this->info['word-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(
 1347             array(
 1348                 new HTMLPurifier_AttrDef_Enum(array('normal')),
 1349                 new HTMLPurifier_AttrDef_CSS_Length()
 1350             )
 1351         );
 1352 
 1353         $this->info['font-size'] = new HTMLPurifier_AttrDef_CSS_Composite(
 1354             array(
 1355                 new HTMLPurifier_AttrDef_Enum(
 1356                     array(
 1357                         'xx-small',
 1358                         'x-small',
 1359                         'small',
 1360                         'medium',
 1361                         'large',
 1362                         'x-large',
 1363                         'xx-large',
 1364                         'larger',
 1365                         'smaller'
 1366                     )
 1367                 ),
 1368                 new HTMLPurifier_AttrDef_CSS_Percentage(),
 1369                 new HTMLPurifier_AttrDef_CSS_Length()
 1370             )
 1371         );
 1372 
 1373         $this->info['line-height'] = new HTMLPurifier_AttrDef_CSS_Composite(
 1374             array(
 1375                 new HTMLPurifier_AttrDef_Enum(array('normal')),
 1376                 new HTMLPurifier_AttrDef_CSS_Number(true), // no negatives
 1377                 new HTMLPurifier_AttrDef_CSS_Length('0'),
 1378                 new HTMLPurifier_AttrDef_CSS_Percentage(true)
 1379             )
 1380         );
 1381 
 1382         $margin =
 1383         $this->info['margin-top'] =
 1384         $this->info['margin-bottom'] =
 1385         $this->info['margin-left'] =
 1386         $this->info['margin-right'] = new HTMLPurifier_AttrDef_CSS_Composite(
 1387             array(
 1388                 new HTMLPurifier_AttrDef_CSS_Length(),
 1389                 new HTMLPurifier_AttrDef_CSS_Percentage(),
 1390                 new HTMLPurifier_AttrDef_Enum(array('auto'))
 1391             )
 1392         );
 1393 
 1394         $this->info['margin'] = new HTMLPurifier_AttrDef_CSS_Multiple($margin);
 1395 
 1396         // non-negative
 1397         $padding =
 1398         $this->info['padding-top'] =
 1399         $this->info['padding-bottom'] =
 1400         $this->info['padding-left'] =
 1401         $this->info['padding-right'] = new HTMLPurifier_AttrDef_CSS_Composite(
 1402             array(
 1403                 new HTMLPurifier_AttrDef_CSS_Length('0'),
 1404                 new HTMLPurifier_AttrDef_CSS_Percentage(true)
 1405             )
 1406         );
 1407 
 1408         $this->info['padding'] = new HTMLPurifier_AttrDef_CSS_Multiple($padding);
 1409 
 1410         $this->info['text-indent'] = new HTMLPurifier_AttrDef_CSS_Composite(
 1411             array(
 1412                 new HTMLPurifier_AttrDef_CSS_Length(),
 1413                 new HTMLPurifier_AttrDef_CSS_Percentage()
 1414             )
 1415         );
 1416 
 1417         $trusted_wh = new HTMLPurifier_AttrDef_CSS_Composite(
 1418             array(
 1419                 new HTMLPurifier_AttrDef_CSS_Length('0'),
 1420                 new HTMLPurifier_AttrDef_CSS_Percentage(true),
 1421                 new HTMLPurifier_AttrDef_Enum(array('auto'))
 1422             )
 1423         );
 1424         $max = $config->get('CSS.MaxImgLength');
 1425 
 1426         $this->info['min-width'] =
 1427         $this->info['max-width'] =
 1428         $this->info['min-height'] =
 1429         $this->info['max-height'] =
 1430         $this->info['width'] =
 1431         $this->info['height'] =
 1432             $max === null ?
 1433                 $trusted_wh :
 1434                 new HTMLPurifier_AttrDef_Switch(
 1435                     'img',
 1436                     // For img tags:
 1437                     new HTMLPurifier_AttrDef_CSS_Composite(
 1438                         array(
 1439                             new HTMLPurifier_AttrDef_CSS_Length('0', $max),
 1440                             new HTMLPurifier_AttrDef_Enum(array('auto'))
 1441                         )
 1442                     ),
 1443                     // For everyone else:
 1444                     $trusted_wh
 1445                 );
 1446 
 1447         $this->info['text-decoration'] = new HTMLPurifier_AttrDef_CSS_TextDecoration();
 1448 
 1449         $this->info['font-family'] = new HTMLPurifier_AttrDef_CSS_FontFamily();
 1450 
 1451         // this could use specialized code
 1452         $this->info['font-weight'] = new HTMLPurifier_AttrDef_Enum(
 1453             array(
 1454                 'normal',
 1455                 'bold',
 1456                 'bolder',
 1457                 'lighter',
 1458                 '100',
 1459                 '200',
 1460                 '300',
 1461                 '400',
 1462                 '500',
 1463                 '600',
 1464                 '700',
 1465                 '800',
 1466                 '900'
 1467             ),
 1468             false
 1469         );
 1470 
 1471         // MUST be called after other font properties, as it references
 1472         // a CSSDefinition object
 1473         $this->info['font'] = new HTMLPurifier_AttrDef_CSS_Font($config);
 1474 
 1475         // same here
 1476         $this->info['border'] =
 1477         $this->info['border-bottom'] =
 1478         $this->info['border-top'] =
 1479         $this->info['border-left'] =
 1480         $this->info['border-right'] = new HTMLPurifier_AttrDef_CSS_Border($config);
 1481 
 1482         $this->info['border-collapse'] = new HTMLPurifier_AttrDef_Enum(
 1483             array('collapse', 'separate')
 1484         );
 1485 
 1486         $this->info['caption-side'] = new HTMLPurifier_AttrDef_Enum(
 1487             array('top', 'bottom')
 1488         );
 1489 
 1490         $this->info['table-layout'] = new HTMLPurifier_AttrDef_Enum(
 1491             array('auto', 'fixed')
 1492         );
 1493 
 1494         $this->info['vertical-align'] = new HTMLPurifier_AttrDef_CSS_Composite(
 1495             array(
 1496                 new HTMLPurifier_AttrDef_Enum(
 1497                     array(
 1498                         'baseline',
 1499                         'sub',
 1500                         'super',
 1501                         'top',
 1502                         'text-top',
 1503                         'middle',
 1504                         'bottom',
 1505                         'text-bottom'
 1506                     )
 1507                 ),
 1508                 new HTMLPurifier_AttrDef_CSS_Length(),
 1509                 new HTMLPurifier_AttrDef_CSS_Percentage()
 1510             )
 1511         );
 1512 
 1513         $this->info['border-spacing'] = new HTMLPurifier_AttrDef_CSS_Multiple(new HTMLPurifier_AttrDef_CSS_Length(), 2);
 1514 
 1515         // These CSS properties don't work on many browsers, but we live
 1516         // in THE FUTURE!
 1517         $this->info['white-space'] = new HTMLPurifier_AttrDef_Enum(
 1518             array('nowrap', 'normal', 'pre', 'pre-wrap', 'pre-line')
 1519         );
 1520 
 1521         if ($config->get('CSS.Proprietary')) {
 1522             $this->doSetupProprietary($config);
 1523         }
 1524 
 1525         if ($config->get('CSS.AllowTricky')) {
 1526             $this->doSetupTricky($config);
 1527         }
 1528 
 1529         if ($config->get('CSS.Trusted')) {
 1530             $this->doSetupTrusted($config);
 1531         }
 1532 
 1533         $allow_important = $config->get('CSS.AllowImportant');
 1534         // wrap all attr-defs with decorator that handles !important
 1535         foreach ($this->info as $k => $v) {
 1536             $this->info[$k] = new HTMLPurifier_AttrDef_CSS_ImportantDecorator($v, $allow_important);
 1537         }
 1538 
 1539         $this->setupConfigStuff($config);
 1540     }
 1541 
 1542     /**
 1543      * @param HTMLPurifier_Config $config
 1544      */
 1545     protected function doSetupProprietary($config)
 1546     {
 1547         // Internet Explorer only scrollbar colors
 1548         $this->info['scrollbar-arrow-color'] = new HTMLPurifier_AttrDef_CSS_Color();
 1549         $this->info['scrollbar-base-color'] = new HTMLPurifier_AttrDef_CSS_Color();
 1550         $this->info['scrollbar-darkshadow-color'] = new HTMLPurifier_AttrDef_CSS_Color();
 1551         $this->info['scrollbar-face-color'] = new HTMLPurifier_AttrDef_CSS_Color();
 1552         $this->info['scrollbar-highlight-color'] = new HTMLPurifier_AttrDef_CSS_Color();
 1553         $this->info['scrollbar-shadow-color'] = new HTMLPurifier_AttrDef_CSS_Color();
 1554 
 1555         // vendor specific prefixes of opacity
 1556         $this->info['-moz-opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue();
 1557         $this->info['-khtml-opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue();
 1558 
 1559         // only opacity, for now
 1560         $this->info['filter'] = new HTMLPurifier_AttrDef_CSS_Filter();
 1561 
 1562         // more CSS3
 1563         $this->info['page-break-after'] =
 1564         $this->info['page-break-before'] = new HTMLPurifier_AttrDef_Enum(
 1565             array(
 1566                 'auto',
 1567                 'always',
 1568                 'avoid',
 1569                 'left',
 1570                 'right'
 1571             )
 1572         );
 1573         $this->info['page-break-inside'] = new HTMLPurifier_AttrDef_Enum(array('auto', 'avoid'));
 1574 
 1575         $border_radius = new HTMLPurifier_AttrDef_CSS_Composite(
 1576             array(
 1577                 new HTMLPurifier_AttrDef_CSS_Percentage(true), // disallow negative
 1578                 new HTMLPurifier_AttrDef_CSS_Length('0') // disallow negative
 1579             ));
 1580 
 1581         $this->info['border-top-left-radius'] =
 1582         $this->info['border-top-right-radius'] =
 1583         $this->info['border-bottom-right-radius'] =
 1584         $this->info['border-bottom-left-radius'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_radius, 2);
 1585         // TODO: support SLASH syntax
 1586         $this->info['border-radius'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_radius, 4);
 1587 
 1588     }
 1589 
 1590     /**
 1591      * @param HTMLPurifier_Config $config
 1592      */
 1593     protected function doSetupTricky($config)
 1594     {
 1595         $this->info['display'] = new HTMLPurifier_AttrDef_Enum(
 1596             array(
 1597                 'inline',
 1598                 'block',
 1599                 'list-item',
 1600                 'run-in',
 1601                 'compact',
 1602                 'marker',
 1603                 'table',
 1604                 'inline-block',
 1605                 'inline-table',
 1606                 'table-row-group',
 1607                 'table-header-group',
 1608                 'table-footer-group',
 1609                 'table-row',
 1610                 'table-column-group',
 1611                 'table-column',
 1612                 'table-cell',
 1613                 'table-caption',
 1614                 'none'
 1615             )
 1616         );
 1617         $this->info['visibility'] = new HTMLPurifier_AttrDef_Enum(
 1618             array('visible', 'hidden', 'collapse')
 1619         );
 1620         $this->info['overflow'] = new HTMLPurifier_AttrDef_Enum(array('visible', 'hidden', 'auto', 'scroll'));
 1621         $this->info['opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue();
 1622     }
 1623 
 1624     /**
 1625      * @param HTMLPurifier_Config $config
 1626      */
 1627     protected function doSetupTrusted($config)
 1628     {
 1629         $this->info['position'] = new HTMLPurifier_AttrDef_Enum(
 1630             array('static', 'relative', 'absolute', 'fixed')
 1631         );
 1632         $this->info['top'] =
 1633         $this->info['left'] =
 1634         $this->info['right'] =
 1635         $this->info['bottom'] = new HTMLPurifier_AttrDef_CSS_Composite(
 1636             array(
 1637                 new HTMLPurifier_AttrDef_CSS_Length(),
 1638                 new HTMLPurifier_AttrDef_CSS_Percentage(),
 1639                 new HTMLPurifier_AttrDef_Enum(array('auto')),
 1640             )
 1641         );
 1642         $this->info['z-index'] = new HTMLPurifier_AttrDef_CSS_Composite(
 1643             array(
 1644                 new HTMLPurifier_AttrDef_Integer(),
 1645                 new HTMLPurifier_AttrDef_Enum(array('auto')),
 1646             )
 1647         );
 1648     }
 1649 
 1650     /**
 1651      * Performs extra config-based processing. Based off of
 1652      * HTMLPurifier_HTMLDefinition.
 1653      * @param HTMLPurifier_Config $config
 1654      * @todo Refactor duplicate elements into common class (probably using
 1655      *       composition, not inheritance).
 1656      */
 1657     protected function setupConfigStuff($config)
 1658     {
 1659         // setup allowed elements
 1660         $support = "(for information on implementing this, see the " .
 1661             "support forums) ";
 1662         $allowed_properties = $config->get('CSS.AllowedProperties');
 1663         if ($allowed_properties !== null) {
 1664             foreach ($this->info as $name => $d) {
 1665                 if (!isset($allowed_properties[$name])) {
 1666                     unset($this->info[$name]);
 1667                 }
 1668                 unset($allowed_properties[$name]);
 1669             }
 1670             // emit errors
 1671             foreach ($allowed_properties as $name => $d) {
 1672                 // :TODO: Is this htmlspecialchars() call really necessary?
 1673                 $name = htmlspecialchars($name);
 1674                 trigger_error("Style attribute '$name' is not supported $support", E_USER_WARNING);
 1675             }
 1676         }
 1677 
 1678         $forbidden_properties = $config->get('CSS.ForbiddenProperties');
 1679         if ($forbidden_properties !== null) {
 1680             foreach ($this->info as $name => $d) {
 1681                 if (isset($forbidden_properties[$name])) {
 1682                     unset($this->info[$name]);
 1683                 }
 1684             }
 1685         }
 1686     }
 1687 }
 1688 
 1689 
 1690 
 1691 
 1692 
 1693 /**
 1694  * Defines allowed child nodes and validates nodes against it.
 1695  */
 1696 abstract class HTMLPurifier_ChildDef
 1697 {
 1698     /**
 1699      * Type of child definition, usually right-most part of class name lowercase.
 1700      * Used occasionally in terms of context.
 1701      * @type string
 1702      */
 1703     public $type;
 1704 
 1705     /**
 1706      * Indicates whether or not an empty array of children is okay.
 1707      *
 1708      * This is necessary for redundant checking when changes affecting
 1709      * a child node may cause a parent node to now be disallowed.
 1710      * @type bool
 1711      */
 1712     public $allow_empty;
 1713 
 1714     /**
 1715      * Lookup array of all elements that this definition could possibly allow.
 1716      * @type array
 1717      */
 1718     public $elements = array();
 1719 
 1720     /**
 1721      * Get lookup of tag names that should not close this element automatically.
 1722      * All other elements will do so.
 1723      * @param HTMLPurifier_Config $config HTMLPurifier_Config object
 1724      * @return array
 1725      */
 1726     public function getAllowedElements($config)
 1727     {
 1728         return $this->elements;
 1729     }
 1730 
 1731     /**
 1732      * Validates nodes according to definition and returns modification.
 1733      *
 1734      * @param HTMLPurifier_Node[] $children Array of HTMLPurifier_Node
 1735      * @param HTMLPurifier_Config $config HTMLPurifier_Config object
 1736      * @param HTMLPurifier_Context $context HTMLPurifier_Context object
 1737      * @return bool|array true to leave nodes as is, false to remove parent node, array of replacement children
 1738      */
 1739     abstract public function validateChildren($children, $config, $context);
 1740 }
 1741 
 1742 
 1743 
 1744 
 1745 
 1746 /**
 1747  * Configuration object that triggers customizable behavior.
 1748  *
 1749  * @warning This class is strongly defined: that means that the class
 1750  *          will fail if an undefined directive is retrieved or set.
 1751  *
 1752  * @note Many classes that could (although many times don't) use the
 1753  *       configuration object make it a mandatory parameter.  This is
 1754  *       because a configuration object should always be forwarded,
 1755  *       otherwise, you run the risk of missing a parameter and then
 1756  *       being stumped when a configuration directive doesn't work.
 1757  *
 1758  * @todo Reconsider some of the public member variables
 1759  */
 1760 class HTMLPurifier_Config
 1761 {
 1762 
 1763     /**
 1764      * HTML Purifier's version
 1765      * @type string
 1766      */
 1767     public $version = '4.10.0';
 1768 
 1769     /**
 1770      * Whether or not to automatically finalize
 1771      * the object if a read operation is done.
 1772      * @type bool
 1773      */
 1774     public $autoFinalize = true;
 1775 
 1776     // protected member variables
 1777 
 1778     /**
 1779      * Namespace indexed array of serials for specific namespaces.
 1780      * @see getSerial() for more info.
 1781      * @type string[]
 1782      */
 1783     protected $serials = array();
 1784 
 1785     /**
 1786      * Serial for entire configuration object.
 1787      * @type string
 1788      */
 1789     protected $serial;
 1790 
 1791     /**
 1792      * Parser for variables.
 1793      * @type HTMLPurifier_VarParser_Flexible
 1794      */
 1795     protected $parser = null;
 1796 
 1797     /**
 1798      * Reference HTMLPurifier_ConfigSchema for value checking.
 1799      * @type HTMLPurifier_ConfigSchema
 1800      * @note This is public for introspective purposes. Please don't
 1801      *       abuse!
 1802      */
 1803     public $def;
 1804 
 1805     /**
 1806      * Indexed array of definitions.
 1807      * @type HTMLPurifier_Definition[]
 1808      */
 1809     protected $definitions;
 1810 
 1811     /**
 1812      * Whether or not config is finalized.
 1813      * @type bool
 1814      */
 1815     protected $finalized = false;
 1816 
 1817     /**
 1818      * Property list containing configuration directives.
 1819      * @type array
 1820      */
 1821     protected $plist;
 1822 
 1823     /**
 1824      * Whether or not a set is taking place due to an alias lookup.
 1825      * @type bool
 1826      */
 1827     private $aliasMode;
 1828 
 1829     /**
 1830      * Set to false if you do not want line and file numbers in errors.
 1831      * (useful when unit testing).  This will also compress some errors
 1832      * and exceptions.
 1833      * @type bool
 1834      */
 1835     public $chatty = true;
 1836 
 1837     /**
 1838      * Current lock; only gets to this namespace are allowed.
 1839      * @type string
 1840      */
 1841     private $lock;
 1842 
 1843     /**
 1844      * Constructor
 1845      * @param HTMLPurifier_ConfigSchema $definition ConfigSchema that defines
 1846      * what directives are allowed.
 1847      * @param HTMLPurifier_PropertyList $parent
 1848      */
 1849     public function __construct($definition, $parent = null)
 1850     {
 1851         $parent = $parent ? $parent : $definition->defaultPlist;
 1852         $this->plist = new HTMLPurifier_PropertyList($parent);
 1853         $this->def = $definition; // keep a copy around for checking
 1854         $this->parser = new HTMLPurifier_VarParser_Flexible();
 1855     }
 1856 
 1857     /**
 1858      * Convenience constructor that creates a config object based on a mixed var
 1859      * @param mixed $config Variable that defines the state of the config
 1860      *                      object. Can be: a HTMLPurifier_Config() object,
 1861      *                      an array of directives based on loadArray(),
 1862      *                      or a string filename of an ini file.
 1863      * @param HTMLPurifier_ConfigSchema $schema Schema object
 1864      * @return HTMLPurifier_Config Configured object
 1865      */
 1866     public static function create($config, $schema = null)
 1867     {
 1868         if ($config instanceof HTMLPurifier_Config) {
 1869             // pass-through
 1870             return $config;
 1871         }
 1872         if (!$schema) {
 1873             $ret = HTMLPurifier_Config::createDefault();
 1874         } else {
 1875             $ret = new HTMLPurifier_Config($schema);
 1876         }
 1877         if (is_string($config)) {
 1878             $ret->loadIni($config);
 1879         } elseif (is_array($config)) $ret->loadArray($config);
 1880         return $ret;
 1881     }
 1882 
 1883     /**
 1884      * Creates a new config object that inherits from a previous one.
 1885      * @param HTMLPurifier_Config $config Configuration object to inherit from.
 1886      * @return HTMLPurifier_Config object with $config as its parent.
 1887      */
 1888     public static function inherit(HTMLPurifier_Config $config)
 1889     {
 1890         return new HTMLPurifier_Config($config->def, $config->plist);
 1891     }
 1892 
 1893     /**
 1894      * Convenience constructor that creates a default configuration object.
 1895      * @return HTMLPurifier_Config default object.
 1896      */
 1897     public static function createDefault()
 1898     {
 1899         $definition = HTMLPurifier_ConfigSchema::instance();
 1900         $config = new HTMLPurifier_Config($definition);
 1901         return $config;
 1902     }
 1903 
 1904     /**
 1905      * Retrieves a value from the configuration.
 1906      *
 1907      * @param string $key String key
 1908      * @param mixed $a
 1909      *
 1910      * @return mixed
 1911      */
 1912     public function get($key, $a = null)
 1913     {
 1914         if ($a !== null) {
 1915             $this->triggerError(
 1916                 "Using deprecated API: use \$config->get('$key.$a') instead",
 1917                 E_USER_WARNING
 1918             );
 1919             $key = "$key.$a";
 1920         }
 1921         if (!$this->finalized) {
 1922             $this->autoFinalize();
 1923         }
 1924         if (!isset($this->def->info[$key])) {
 1925             // can't add % due to SimpleTest bug
 1926             $this->triggerError(
 1927                 'Cannot retrieve value of undefined directive ' . htmlspecialchars($key),
 1928                 E_USER_WARNING
 1929             );
 1930             return;
 1931         }
 1932         if (isset($this->def->info[$key]->isAlias)) {
 1933             $d = $this->def->info[$key];
 1934             $this->triggerError(
 1935                 'Cannot get value from aliased directive, use real name ' . $d->key,
 1936                 E_USER_ERROR
 1937             );
 1938             return;
 1939         }
 1940         if ($this->lock) {
 1941             list($ns) = explode('.', $key);
 1942             if ($ns !== $this->lock) {
 1943                 $this->triggerError(
 1944                     'Cannot get value of namespace ' . $ns . ' when lock for ' .
 1945                     $this->lock .
 1946                     ' is active, this probably indicates a Definition setup method ' .
 1947                     'is accessing directives that are not within its namespace',
 1948                     E_USER_ERROR
 1949                 );
 1950                 return;
 1951             }
 1952         }
 1953         return $this->plist->get($key);
 1954     }
 1955 
 1956     /**
 1957      * Retrieves an array of directives to values from a given namespace
 1958      *
 1959      * @param string $namespace String namespace
 1960      *
 1961      * @return array
 1962      */
 1963     public function getBatch($namespace)
 1964     {
 1965         if (!$this->finalized) {
 1966             $this->autoFinalize();
 1967         }
 1968         $full = $this->getAll();
 1969         if (!isset($full[$namespace])) {
 1970             $this->triggerError(
 1971                 'Cannot retrieve undefined namespace ' .
 1972                 htmlspecialchars($namespace),
 1973                 E_USER_WARNING
 1974             );
 1975             return;
 1976         }
 1977         return $full[$namespace];
 1978     }
 1979 
 1980     /**
 1981      * Returns a SHA-1 signature of a segment of the configuration object
 1982      * that uniquely identifies that particular configuration
 1983      *
 1984      * @param string $namespace Namespace to get serial for
 1985      *
 1986      * @return string
 1987      * @note Revision is handled specially and is removed from the batch
 1988      *       before processing!
 1989      */
 1990     public function getBatchSerial($namespace)
 1991     {
 1992         if (empty($this->serials[$namespace])) {
 1993             $batch = $this->getBatch($namespace);
 1994             unset($batch['DefinitionRev']);
 1995             $this->serials[$namespace] = sha1(serialize($batch));
 1996         }
 1997         return $this->serials[$namespace];
 1998     }
 1999 
 2000     /**
 2001      * Returns a SHA-1 signature for the entire configuration object
 2002      * that uniquely identifies that particular configuration
 2003      *
 2004      * @return string
 2005      */
 2006     public function getSerial()
 2007     {
 2008         if (empty($this->serial)) {
 2009             $this->serial = sha1(serialize($this->getAll()));
 2010         }
 2011         return $this->serial;
 2012     }
 2013 
 2014     /**
 2015      * Retrieves all directives, organized by namespace
 2016      *
 2017      * @warning This is a pretty inefficient function, avoid if you can
 2018      */
 2019     public function getAll()
 2020     {
 2021         if (!$this->finalized) {
 2022             $this->autoFinalize();
 2023         }
 2024         $ret = array();
 2025         foreach ($this->plist->squash() as $name => $value) {
 2026             list($ns, $key) = explode('.', $name, 2);
 2027             $ret[$ns][$key] = $value;
 2028         }
 2029         return $ret;
 2030     }
 2031 
 2032     /**
 2033      * Sets a value to configuration.
 2034      *
 2035      * @param string $key key
 2036      * @param mixed $value value
 2037      * @param mixed $a
 2038      */
 2039     public function set($key, $value, $a = null)
 2040     {
 2041         if (strpos($key, '.') === false) {
 2042             $namespace = $key;
 2043             $directive = $value;
 2044             $value = $a;
 2045             $key = "$key.$directive";
 2046             $this->triggerError("Using deprecated API: use \$config->set('$key', ...) instead", E_USER_NOTICE);
 2047         } else {
 2048             list($namespace) = explode('.', $key);
 2049         }
 2050         if ($this->isFinalized('Cannot set directive after finalization')) {
 2051             return;
 2052         }
 2053         if (!isset($this->def->info[$key])) {
 2054             $this->triggerError(
 2055                 'Cannot set undefined directive ' . htmlspecialchars($key) . ' to value',
 2056                 E_USER_WARNING
 2057             );
 2058             return;
 2059         }
 2060         $def = $this->def->info[$key];
 2061 
 2062         if (isset($def->isAlias)) {
 2063             if ($this->aliasMode) {
 2064                 $this->triggerError(
 2065                     'Double-aliases not allowed, please fix '.
 2066                     'ConfigSchema bug with' . $key,
 2067                     E_USER_ERROR
 2068                 );
 2069                 return;
 2070             }
 2071             $this->aliasMode = true;
 2072             $this->set($def->key, $value);
 2073             $this->aliasMode = false;
 2074             $this->triggerError("$key is an alias, preferred directive name is {$def->key}", E_USER_NOTICE);
 2075             return;
 2076         }
 2077 
 2078         // Raw type might be negative when using the fully optimized form
 2079         // of stdClass, which indicates allow_null == true
 2080         $rtype = is_int($def) ? $def : $def->type;
 2081         if ($rtype < 0) {
 2082             $type = -$rtype;
 2083             $allow_null = true;
 2084         } else {
 2085             $type = $rtype;
 2086             $allow_null = isset($def->allow_null);
 2087         }
 2088 
 2089         try {
 2090             $value = $this->parser->parse($value, $type, $allow_null);
 2091         } catch (HTMLPurifier_VarParserException $e) {
 2092             $this->triggerError(
 2093                 'Value for ' . $key . ' is of invalid type, should be ' .
 2094                 HTMLPurifier_VarParser::getTypeName($type),
 2095                 E_USER_WARNING
 2096             );
 2097             return;
 2098         }
 2099         if (is_string($value) && is_object($def)) {
 2100             // resolve value alias if defined
 2101             if (isset($def->aliases[$value])) {
 2102                 $value = $def->aliases[$value];
 2103             }
 2104             // check to see if the value is allowed
 2105             if (isset($def->allowed) && !isset($def->allowed[$value])) {
 2106                 $this->triggerError(
 2107                     'Value not supported, valid values are: ' .
 2108                     $this->_listify($def->allowed),
 2109                     E_USER_WARNING
 2110                 );
 2111                 return;
 2112             }
 2113         }
 2114         $this->plist->set($key, $value);
 2115 
 2116         // reset definitions if the directives they depend on changed
 2117         // this is a very costly process, so it's discouraged
 2118         // with finalization
 2119         if ($namespace == 'HTML' || $namespace == 'CSS' || $namespace == 'URI') {
 2120             $this->definitions[$namespace] = null;
 2121         }
 2122 
 2123         $this->serials[$namespace] = false;
 2124     }
 2125 
 2126     /**
 2127      * Convenience function for error reporting
 2128      *
 2129      * @param array $lookup
 2130      *
 2131      * @return string
 2132      */
 2133     private function _listify($lookup)
 2134     {
 2135         $list = array();
 2136         foreach ($lookup as $name => $b) {
 2137             $list[] = $name;
 2138         }
 2139         return implode(', ', $list);
 2140     }
 2141 
 2142     /**
 2143      * Retrieves object reference to the HTML definition.
 2144      *
 2145      * @param bool $raw Return a copy that has not been setup yet. Must be
 2146      *             called before it's been setup, otherwise won't work.
 2147      * @param bool $optimized If true, this method may return null, to
 2148      *             indicate that a cached version of the modified
 2149      *             definition object is available and no further edits
 2150      *             are necessary.  Consider using
 2151      *             maybeGetRawHTMLDefinition, which is more explicitly
 2152      *             named, instead.
 2153      *
 2154      * @return HTMLPurifier_HTMLDefinition
 2155      */
 2156     public function getHTMLDefinition($raw = false, $optimized = false)
 2157     {
 2158         return $this->getDefinition('HTML', $raw, $optimized);
 2159     }
 2160 
 2161     /**
 2162      * Retrieves object reference to the CSS definition
 2163      *
 2164      * @param bool $raw Return a copy that has not been setup yet. Must be
 2165      *             called before it's been setup, otherwise won't work.
 2166      * @param bool $optimized If true, this method may return null, to
 2167      *             indicate that a cached version of the modified
 2168      *             definition object is available and no further edits
 2169      *             are necessary.  Consider using
 2170      *             maybeGetRawCSSDefinition, which is more explicitly
 2171      *             named, instead.
 2172      *
 2173      * @return HTMLPurifier_CSSDefinition
 2174      */
 2175     public function getCSSDefinition($raw = false, $optimized = false)
 2176     {
 2177         return $this->getDefinition('CSS', $raw, $optimized);
 2178     }
 2179 
 2180     /**
 2181      * Retrieves object reference to the URI definition
 2182      *
 2183      * @param bool $raw Return a copy that has not been setup yet. Must be
 2184      *             called before it's been setup, otherwise won't work.
 2185      * @param bool $optimized If true, this method may return null, to
 2186      *             indicate that a cached version of the modified
 2187      *             definition object is available and no further edits
 2188      *             are necessary.  Consider using
 2189      *             maybeGetRawURIDefinition, which is more explicitly
 2190      *             named, instead.
 2191      *
 2192      * @return HTMLPurifier_URIDefinition
 2193      */
 2194     public function getURIDefinition($raw = false, $optimized = false)
 2195     {
 2196         return $this->getDefinition('URI', $raw, $optimized);
 2197     }
 2198 
 2199     /**
 2200      * Retrieves a definition
 2201      *
 2202      * @param string $type Type of definition: HTML, CSS, etc
 2203      * @param bool $raw Whether or not definition should be returned raw
 2204      * @param bool $optimized Only has an effect when $raw is true.  Whether
 2205      *        or not to return null if the result is already present in
 2206      *        the cache.  This is off by default for backwards
 2207      *        compatibility reasons, but you need to do things this
 2208      *        way in order to ensure that caching is done properly.
 2209      *        Check out enduser-customize.html for more details.
 2210      *        We probably won't ever change this default, as much as the
 2211      *        maybe semantics is the "right thing to do."
 2212      *
 2213      * @throws HTMLPurifier_Exception
 2214      * @return HTMLPurifier_Definition
 2215      */
 2216     public function getDefinition($type, $raw = false, $optimized = false)
 2217     {
 2218         if ($optimized && !$raw) {
 2219             throw new HTMLPurifier_Exception("Cannot set optimized = true when raw = false");
 2220         }
 2221         if (!$this->finalized) {
 2222             $this->autoFinalize();
 2223         }
 2224         // temporarily suspend locks, so we can handle recursive definition calls
 2225         $lock = $this->lock;
 2226         $this->lock = null;
 2227         $factory = HTMLPurifier_DefinitionCacheFactory::instance();
 2228         $cache = $factory->create($type, $this);
 2229         $this->lock = $lock;
 2230         if (!$raw) {
 2231             // full definition
 2232             // ---------------
 2233             // check if definition is in memory
 2234             if (!empty($this->definitions[$type])) {
 2235                 $def = $this->definitions[$type];
 2236                 // check if the definition is setup
 2237                 if ($def->setup) {
 2238                     return $def;
 2239                 } else {
 2240                     $def->setup($this);
 2241                     if ($def->optimized) {
 2242                         $cache->add($def, $this);
 2243                     }
 2244                     return $def;
 2245                 }
 2246             }
 2247             // check if definition is in cache
 2248             $def = $cache->get($this);
 2249             if ($def) {
 2250                 // definition in cache, save to memory and return it
 2251                 $this->definitions[$type] = $def;
 2252                 return $def;
 2253             }
 2254             // initialize it
 2255             $def = $this->initDefinition($type);
 2256             // set it up
 2257             $this->lock = $type;
 2258             $def->setup($this);
 2259             $this->lock = null;
 2260             // save in cache
 2261             $cache->add($def, $this);
 2262             // return it
 2263             return $def;
 2264         } else {
 2265             // raw definition
 2266             // --------------
 2267             // check preconditions
 2268             $def = null;
 2269             if ($optimized) {
 2270                 if (is_null($this->get($type . '.DefinitionID'))) {
 2271                     // fatally error out if definition ID not set
 2272                     throw new HTMLPurifier_Exception(
 2273                         "Cannot retrieve raw version without specifying %$type.DefinitionID"
 2274                     );
 2275                 }
 2276             }
 2277             if (!empty($this->definitions[$type])) {
 2278                 $def = $this->definitions[$type];
 2279                 if ($def->setup && !$optimized) {
 2280                     $extra = $this->chatty ?
 2281                         " (try moving this code block earlier in your initialization)" :
 2282                         "";
 2283                     throw new HTMLPurifier_Exception(
 2284                         "Cannot retrieve raw definition after it has already been setup" .
 2285                         $extra
 2286                     );
 2287                 }
 2288                 if ($def->optimized === null) {
 2289                     $extra = $this->chatty ? " (try flushing your cache)" : "";
 2290                     throw new HTMLPurifier_Exception(
 2291                         "Optimization status of definition is unknown" . $extra
 2292                     );
 2293                 }
 2294                 if ($def->optimized !== $optimized) {
 2295                     $msg = $optimized ? "optimized" : "unoptimized";
 2296                     $extra = $this->chatty ?
 2297                         " (this backtrace is for the first inconsistent call, which was for a $msg raw definition)"
 2298                         : "";
 2299                     throw new HTMLPurifier_Exception(
 2300                         "Inconsistent use of optimized and unoptimized raw definition retrievals" . $extra
 2301                     );
 2302                 }
 2303             }
 2304             // check if definition was in memory
 2305             if ($def) {
 2306                 if ($def->setup) {
 2307                     // invariant: $optimized === true (checked above)
 2308                     return null;
 2309                 } else {
 2310                     return $def;
 2311                 }
 2312             }
 2313             // if optimized, check if definition was in cache
 2314             // (because we do the memory check first, this formulation
 2315             // is prone to cache slamming, but I think
 2316             // guaranteeing that either /all/ of the raw
 2317             // setup code or /none/ of it is run is more important.)
 2318             if ($optimized) {
 2319                 // This code path only gets run once; once we put
 2320                 // something in $definitions (which is guaranteed by the
 2321                 // trailing code), we always short-circuit above.
 2322                 $def = $cache->get($this);
 2323                 if ($def) {
 2324                     // save the full definition for later, but don't
 2325                     // return it yet
 2326                     $this->definitions[$type] = $def;
 2327                     return null;
 2328                 }
 2329             }
 2330             // check invariants for creation
 2331             if (!$optimized) {
 2332                 if (!is_null($this->get($type . '.DefinitionID'))) {
 2333                     if ($this->chatty) {
 2334                         $this->triggerError(
 2335                             'Due to a documentation error in previous version of HTML Purifier, your ' .
 2336                             'definitions are not being cached.  If this is OK, you can remove the ' .
 2337                             '%$type.DefinitionRev and %$type.DefinitionID declaration.  Otherwise, ' .
 2338                             'modify your code to use maybeGetRawDefinition, and test if the returned ' .
 2339                             'value is null before making any edits (if it is null, that means that a ' .
 2340                             'cached version is available, and no raw operations are necessary).  See ' .
 2341                             '<a href="http://htmlpurifier.org/docs/enduser-customize.html#optimized">' .
 2342                             'Customize</a> for more details',
 2343                             E_USER_WARNING
 2344                         );
 2345                     } else {
 2346                         $this->triggerError(
 2347                             "Useless DefinitionID declaration",
 2348                             E_USER_WARNING
 2349                         );
 2350                     }
 2351                 }
 2352             }
 2353             // initialize it
 2354             $def = $this->initDefinition($type);
 2355             $def->optimized = $optimized;
 2356             return $def;
 2357         }
 2358         throw new HTMLPurifier_Exception("The impossible happened!");
 2359     }
 2360 
 2361     /**
 2362      * Initialise definition
 2363      *
 2364      * @param string $type What type of definition to create
 2365      *
 2366      * @return HTMLPurifier_CSSDefinition|HTMLPurifier_HTMLDefinition|HTMLPurifier_URIDefinition
 2367      * @throws HTMLPurifier_Exception
 2368      */
 2369     private function initDefinition($type)
 2370     {
 2371         // quick checks failed, let's create the object
 2372         if ($type == 'HTML') {
 2373             $def = new HTMLPurifier_HTMLDefinition();
 2374         } elseif ($type == 'CSS') {
 2375             $def = new HTMLPurifier_CSSDefinition();
 2376         } elseif ($type == 'URI') {
 2377             $def = new HTMLPurifier_URIDefinition();
 2378         } else {
 2379             throw new HTMLPurifier_Exception(
 2380                 "Definition of $type type not supported"
 2381             );
 2382         }
 2383         $this->definitions[$type] = $def;
 2384         return $def;
 2385     }
 2386 
 2387     public function maybeGetRawDefinition($name)
 2388     {
 2389         return $this->getDefinition($name, true, true);
 2390     }
 2391 
 2392     /**
 2393      * @return HTMLPurifier_HTMLDefinition
 2394      */
 2395     public function maybeGetRawHTMLDefinition()
 2396     {
 2397         return $this->getDefinition('HTML', true, true);
 2398     }
 2399 
 2400     /**
 2401      * @return HTMLPurifier_CSSDefinition
 2402      */
 2403     public function maybeGetRawCSSDefinition()
 2404     {
 2405         return $this->getDefinition('CSS', true, true);
 2406     }
 2407 
 2408     /**
 2409      * @return HTMLPurifier_URIDefinition
 2410      */
 2411     public function maybeGetRawURIDefinition()
 2412     {
 2413         return $this->getDefinition('URI', true, true);
 2414     }
 2415 
 2416     /**
 2417      * Loads configuration values from an array with the following structure:
 2418      * Namespace.Directive => Value
 2419      *
 2420      * @param array $config_array Configuration associative array
 2421      */
 2422     public function loadArray($config_array)
 2423     {
 2424         if ($this->isFinalized('Cannot load directives after finalization')) {
 2425             return;
 2426         }
 2427         foreach ($config_array as $key => $value) {
 2428             $key = str_replace('_', '.', $key);
 2429             if (strpos($key, '.') !== false) {
 2430                 $this->set($key, $value);
 2431             } else {
 2432                 $namespace = $key;
 2433                 $namespace_values = $value;
 2434                 foreach ($namespace_values as $directive => $value2) {
 2435                     $this->set($namespace .'.'. $directive, $value2);
 2436                 }
 2437             }
 2438         }
 2439     }
 2440 
 2441     /**
 2442      * Returns a list of array(namespace, directive) for all directives
 2443      * that are allowed in a web-form context as per an allowed
 2444      * namespaces/directives list.
 2445      *
 2446      * @param array $allowed List of allowed namespaces/directives
 2447      * @param HTMLPurifier_ConfigSchema $schema Schema to use, if not global copy
 2448      *
 2449      * @return array
 2450      */
 2451     public static function getAllowedDirectivesForForm($allowed, $schema = null)
 2452     {
 2453         if (!$schema) {
 2454             $schema = HTMLPurifier_ConfigSchema::instance();
 2455         }
 2456         if ($allowed !== true) {
 2457             if (is_string($allowed)) {
 2458                 $allowed = array($allowed);
 2459             }
 2460             $allowed_ns = array();
 2461             $allowed_directives = array();
 2462             $blacklisted_directives = array();
 2463             foreach ($allowed as $ns_or_directive) {
 2464                 if (strpos($ns_or_directive, '.') !== false) {
 2465                     // directive
 2466                     if ($ns_or_directive[0] == '-') {
 2467                         $blacklisted_directives[substr($ns_or_directive, 1)] = true;
 2468                     } else {
 2469                         $allowed_directives[$ns_or_directive] = true;
 2470                     }
 2471                 } else {
 2472                     // namespace
 2473                     $allowed_ns[$ns_or_directive] = true;
 2474                 }
 2475             }
 2476         }
 2477         $ret = array();
 2478         foreach ($schema->info as $key => $def) {
 2479             list($ns, $directive) = explode('.', $key, 2);
 2480             if ($allowed !== true) {
 2481                 if (isset($blacklisted_directives["$ns.$directive"])) {
 2482                     continue;
 2483                 }
 2484                 if (!isset($allowed_directives["$ns.$directive"]) && !isset($allowed_ns[$ns])) {
 2485                     continue;
 2486                 }
 2487             }
 2488             if (isset($def->isAlias)) {
 2489                 continue;
 2490             }
 2491             if ($directive == 'DefinitionID' || $directive == 'DefinitionRev') {
 2492                 continue;
 2493             }
 2494             $ret[] = array($ns, $directive);
 2495         }
 2496         return $ret;
 2497     }
 2498 
 2499     /**
 2500      * Loads configuration values from $_GET/$_POST that were posted
 2501      * via ConfigForm
 2502      *
 2503      * @param array $array $_GET or $_POST array to import
 2504      * @param string|bool $index Index/name that the config variables are in
 2505      * @param array|bool $allowed List of allowed namespaces/directives
 2506      * @param bool $mq_fix Boolean whether or not to enable magic quotes fix
 2507      * @param HTMLPurifier_ConfigSchema $schema Schema to use, if not global copy
 2508      *
 2509      * @return mixed
 2510      */
 2511     public static function loadArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true, $schema = null)
 2512     {
 2513         $ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix, $schema);
 2514         $config = HTMLPurifier_Config::create($ret, $schema);
 2515         return $config;
 2516     }
 2517 
 2518     /**
 2519      * Merges in configuration values from $_GET/$_POST to object. NOT STATIC.
 2520      *
 2521      * @param array $array $_GET or $_POST array to import
 2522      * @param string|bool $index Index/name that the config variables are in
 2523      * @param array|bool $allowed List of allowed namespaces/directives
 2524      * @param bool $mq_fix Boolean whether or not to enable magic quotes fix
 2525      */
 2526     public function mergeArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true)
 2527     {
 2528         $ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix, $this->def);
 2529         $this->loadArray($ret);
 2530     }
 2531 
 2532     /**
 2533      * Prepares an array from a form into something usable for the more
 2534      * strict parts of HTMLPurifier_Config
 2535      *
 2536      * @param array $array $_GET or $_POST array to import
 2537      * @param string|bool $index Index/name that the config variables are in
 2538      * @param array|bool $allowed List of allowed namespaces/directives
 2539      * @param bool $mq_fix Boolean whether or not to enable magic quotes fix
 2540      * @param HTMLPurifier_ConfigSchema $schema Schema to use, if not global copy
 2541      *
 2542      * @return array
 2543      */
 2544     public static function prepareArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true, $schema = null)
 2545     {
 2546         if ($index !== false) {
 2547             $array = (isset($array[$index]) && is_array($array[$index])) ? $array[$index] : array();
 2548         }
 2549         $mq = $mq_fix && function_exists('get_magic_quotes_gpc') && get_magic_quotes_gpc();
 2550 
 2551         $allowed = HTMLPurifier_Config::getAllowedDirectivesForForm($allowed, $schema);
 2552         $ret = array();
 2553         foreach ($allowed as $key) {
 2554             list($ns, $directive) = $key;
 2555             $skey = "$ns.$directive";
 2556             if (!empty($array["Null_$skey"])) {
 2557                 $ret[$ns][$directive] = null;
 2558                 continue;
 2559             }
 2560             if (!isset($array[$skey])) {
 2561                 continue;
 2562             }
 2563             $value = $mq ? stripslashes($array[$skey]) : $array[$skey];
 2564             $ret[$ns][$directive] = $value;
 2565         }
 2566         return $ret;
 2567     }
 2568 
 2569     /**
 2570      * Loads configuration values from an ini file
 2571      *
 2572      * @param string $filename Name of ini file
 2573      */
 2574     public function loadIni($filename)
 2575     {
 2576         if ($this->isFinalized('Cannot load directives after finalization')) {
 2577             return;
 2578         }
 2579         $array = parse_ini_file($filename, true);
 2580         $this->loadArray($array);
 2581     }
 2582 
 2583     /**
 2584      * Checks whether or not the configuration object is finalized.
 2585      *
 2586      * @param string|bool $error String error message, or false for no error
 2587      *
 2588      * @return bool
 2589      */
 2590     public function isFinalized($error = false)
 2591     {
 2592         if ($this->finalized && $error) {
 2593             $this->triggerError($error, E_USER_ERROR);
 2594         }
 2595         return $this->finalized;
 2596     }
 2597 
 2598     /**
 2599      * Finalizes configuration only if auto finalize is on and not
 2600      * already finalized
 2601      */
 2602     public function autoFinalize()
 2603     {
 2604         if ($this->autoFinalize) {
 2605             $this->finalize();
 2606         } else {
 2607             $this->plist->squash(true);
 2608         }
 2609     }
 2610 
 2611     /**
 2612      * Finalizes a configuration object, prohibiting further change
 2613      */
 2614     public function finalize()
 2615     {
 2616         $this->finalized = true;
 2617         $this->parser = null;
 2618     }
 2619 
 2620     /**
 2621      * Produces a nicely formatted error message by supplying the
 2622      * stack frame information OUTSIDE of HTMLPurifier_Config.
 2623      *
 2624      * @param string $msg An error message
 2625      * @param int $no An error number
 2626      */
 2627     protected function triggerError($msg, $no)
 2628     {
 2629         // determine previous stack frame
 2630         $extra = '';
 2631         if ($this->chatty) {
 2632             $trace = debug_backtrace();
 2633             // zip(tail(trace), trace) -- but PHP is not Haskell har har
 2634             for ($i = 0, $c = count($trace); $i < $c - 1; $i++) {
 2635                 // XXX this is not correct on some versions of HTML Purifier
 2636                 if ($trace[$i + 1]['class'] === 'HTMLPurifier_Config') {
 2637                     continue;
 2638                 }
 2639                 $frame = $trace[$i];
 2640                 $extra = " invoked on line {$frame['line']} in file {$frame['file']}";
 2641                 break;
 2642             }
 2643         }
 2644         trigger_error($msg . $extra, $no);
 2645     }
 2646 
 2647     /**
 2648      * Returns a serialized form of the configuration object that can
 2649      * be reconstituted.
 2650      *
 2651      * @return string
 2652      */
 2653     public function serialize()
 2654     {
 2655         $this->getDefinition('HTML');
 2656         $this->getDefinition('CSS');
 2657         $this->getDefinition('URI');
 2658         return serialize($this);
 2659     }
 2660 
 2661 }
 2662 
 2663 
 2664 
 2665 
 2666 
 2667 /**
 2668  * Configuration definition, defines directives and their defaults.
 2669  */
 2670 class HTMLPurifier_ConfigSchema
 2671 {
 2672     /**
 2673      * Defaults of the directives and namespaces.
 2674      * @type array
 2675      * @note This shares the exact same structure as HTMLPurifier_Config::$conf
 2676      */
 2677     public $defaults = array();
 2678 
 2679     /**
 2680      * The default property list. Do not edit this property list.
 2681      * @type array
 2682      */
 2683     public $defaultPlist;
 2684 
 2685     /**
 2686      * Definition of the directives.
 2687      * The structure of this is:
 2688      *
 2689      *  array(
 2690      *      'Namespace' => array(
 2691      *          'Directive' => new stdClass(),
 2692      *      )
 2693      *  )
 2694      *
 2695      * The stdClass may have the following properties:
 2696      *
 2697      *  - If isAlias isn't set:
 2698      *      - type: Integer type of directive, see HTMLPurifier_VarParser for definitions
 2699      *      - allow_null: If set, this directive allows null values
 2700      *      - aliases: If set, an associative array of value aliases to real values
 2701      *      - allowed: If set, a lookup array of allowed (string) values
 2702      *  - If isAlias is set:
 2703      *      - namespace: Namespace this directive aliases to
 2704      *      - name: Directive name this directive aliases to
 2705      *
 2706      * In certain degenerate cases, stdClass will actually be an integer. In
 2707      * that case, the value is equivalent to an stdClass with the type
 2708      * property set to the integer. If the integer is negative, type is
 2709      * equal to the absolute value of integer, and allow_null is true.
 2710      *
 2711      * This class is friendly with HTMLPurifier_Config. If you need introspection
 2712      * about the schema, you're better of using the ConfigSchema_Interchange,
 2713      * which uses more memory but has much richer information.
 2714      * @type array
 2715      */
 2716     public $info = array();
 2717 
 2718     /**
 2719      * Application-wide singleton
 2720      * @type HTMLPurifier_ConfigSchema
 2721      */
 2722     protected static $singleton;
 2723 
 2724     public function __construct()
 2725     {
 2726         $this->defaultPlist = new HTMLPurifier_PropertyList();
 2727     }
 2728 
 2729     /**
 2730      * Unserializes the default ConfigSchema.
 2731      * @return HTMLPurifier_ConfigSchema
 2732      */
 2733     public static function makeFromSerial()
 2734     {
 2735         $contents = file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema.ser');
 2736         $r = unserialize($contents);
 2737         if (!$r) {
 2738             $hash = sha1($contents);
 2739             trigger_error("Unserialization of configuration schema failed, sha1 of file was $hash", E_USER_ERROR);
 2740         }
 2741         return $r;
 2742     }
 2743 
 2744     /**
 2745      * Retrieves an instance of the application-wide configuration definition.
 2746      * @param HTMLPurifier_ConfigSchema $prototype
 2747      * @return HTMLPurifier_ConfigSchema
 2748      */
 2749     public static function instance($prototype = null)
 2750     {
 2751         if ($prototype !== null) {
 2752             HTMLPurifier_ConfigSchema::$singleton = $prototype;
 2753         } elseif (HTMLPurifier_ConfigSchema::$singleton === null || $prototype === true) {
 2754             HTMLPurifier_ConfigSchema::$singleton = HTMLPurifier_ConfigSchema::makeFromSerial();
 2755         }
 2756         return HTMLPurifier_ConfigSchema::$singleton;
 2757     }
 2758 
 2759     /**
 2760      * Defines a directive for configuration
 2761      * @warning Will fail of directive's namespace is defined.
 2762      * @warning This method's signature is slightly different from the legacy
 2763      *          define() static method! Beware!
 2764      * @param string $key Name of directive
 2765      * @param mixed $default Default value of directive
 2766      * @param string $type Allowed type of the directive. See
 2767      *      HTMLPurifier_DirectiveDef::$type for allowed values
 2768      * @param bool $allow_null Whether or not to allow null values
 2769      */
 2770     public function add($key, $default, $type, $allow_null)
 2771     {
 2772         $obj = new stdClass();
 2773         $obj->type = is_int($type) ? $type : HTMLPurifier_VarParser::$types[$type];
 2774         if ($allow_null) {
 2775             $obj->allow_null = true;
 2776         }
 2777         $this->info[$key] = $obj;
 2778         $this->defaults[$key] = $default;
 2779         $this->defaultPlist->set($key, $default);
 2780     }
 2781 
 2782     /**
 2783      * Defines a directive value alias.
 2784      *
 2785      * Directive value aliases are convenient for developers because it lets
 2786      * them set a directive to several values and get the same result.
 2787      * @param string $key Name of Directive
 2788      * @param array $aliases Hash of aliased values to the real alias
 2789      */
 2790     public function addValueAliases($key, $aliases)
 2791     {
 2792         if (!isset($this->info[$key]->aliases)) {
 2793             $this->info[$key]->aliases = array();
 2794         }
 2795         foreach ($aliases as $alias => $real) {
 2796             $this->info[$key]->aliases[$alias] = $real;
 2797         }
 2798     }
 2799 
 2800     /**
 2801      * Defines a set of allowed values for a directive.
 2802      * @warning This is slightly different from the corresponding static
 2803      *          method definition.
 2804      * @param string $key Name of directive
 2805      * @param array $allowed Lookup array of allowed values
 2806      */
 2807     public function addAllowedValues($key, $allowed)
 2808     {
 2809         $this->info[$key]->allowed = $allowed;
 2810     }
 2811 
 2812     /**
 2813      * Defines a directive alias for backwards compatibility
 2814      * @param string $key Directive that will be aliased
 2815      * @param string $new_key Directive that the alias will be to
 2816      */
 2817     public function addAlias($key, $new_key)
 2818     {
 2819         $obj = new stdClass;
 2820         $obj->key = $new_key;
 2821         $obj->isAlias = true;
 2822         $this->info[$key] = $obj;
 2823     }
 2824 
 2825     /**
 2826      * Replaces any stdClass that only has the type property with type integer.
 2827      */
 2828     public function postProcess()
 2829     {
 2830         foreach ($this->info as $key => $v) {
 2831             if (count((array) $v) == 1) {
 2832                 $this->info[$key] = $v->type;
 2833             } elseif (count((array) $v) == 2 && isset($v->allow_null)) {
 2834                 $this->info[$key] = -$v->type;
 2835             }
 2836         }
 2837     }
 2838 }
 2839 
 2840 
 2841 
 2842 
 2843 
 2844 /**
 2845  * @todo Unit test
 2846  */
 2847 class HTMLPurifier_ContentSets
 2848 {
 2849 
 2850     /**
 2851      * List of content set strings (pipe separators) indexed by name.
 2852      * @type array
 2853      */
 2854     public $info = array();
 2855 
 2856     /**
 2857      * List of content set lookups (element => true) indexed by name.
 2858      * @type array
 2859      * @note This is in HTMLPurifier_HTMLDefinition->info_content_sets
 2860      */
 2861     public $lookup = array();
 2862 
 2863     /**
 2864      * Synchronized list of defined content sets (keys of info).
 2865      * @type array
 2866      */
 2867     protected $keys = array();
 2868     /**
 2869      * Synchronized list of defined content values (values of info).
 2870      * @type array
 2871      */
 2872     protected $values = array();
 2873 
 2874     /**
 2875      * Merges in module's content sets, expands identifiers in the content
 2876      * sets and populates the keys, values and lookup member variables.
 2877      * @param HTMLPurifier_HTMLModule[] $modules List of HTMLPurifier_HTMLModule
 2878      */
 2879     public function __construct($modules)
 2880     {
 2881         if (!is_array($modules)) {
 2882             $modules = array($modules);
 2883         }
 2884         // populate content_sets based on module hints
 2885         // sorry, no way of overloading
 2886         foreach ($modules as $module) {
 2887             foreach ($module->content_sets as $key => $value) {
 2888                 $temp = $this->convertToLookup($value);
 2889                 if (isset($this->lookup[$key])) {
 2890                     // add it into the existing content set
 2891                     $this->lookup[$key] = array_merge($this->lookup[$key], $temp);
 2892                 } else {
 2893                     $this->lookup[$key] = $temp;
 2894                 }
 2895             }
 2896         }
 2897         $old_lookup = false;
 2898         while ($old_lookup !== $this->lookup) {
 2899             $old_lookup = $this->lookup;
 2900             foreach ($this->lookup as $i => $set) {
 2901                 $add = array();
 2902                 foreach ($set as $element => $x) {
 2903                     if (isset($this->lookup[$element])) {
 2904                         $add += $this->lookup[$element];
 2905                         unset($this->lookup[$i][$element]);
 2906                     }
 2907                 }
 2908                 $this->lookup[$i] += $add;
 2909             }
 2910         }
 2911 
 2912         foreach ($this->lookup as $key => $lookup) {
 2913             $this->info[$key] = implode(' | ', array_keys($lookup));
 2914         }
 2915         $this->keys   = array_keys($this->info);
 2916         $this->values = array_values($this->info);
 2917     }
 2918 
 2919     /**
 2920      * Accepts a definition; generates and assigns a ChildDef for it
 2921      * @param HTMLPurifier_ElementDef $def HTMLPurifier_ElementDef reference
 2922      * @param HTMLPurifier_HTMLModule $module Module that defined the ElementDef
 2923      */
 2924     public function generateChildDef(&$def, $module)
 2925     {
 2926         if (!empty($def->child)) { // already done!
 2927             return;
 2928         }
 2929         $content_model = $def->content_model;
 2930         if (is_string($content_model)) {
 2931             // Assume that $this->keys is alphanumeric
 2932             $def->content_model = preg_replace_callback(
 2933                 '/\b(' . implode('|', $this->keys) . ')\b/',
 2934                 array($this, 'generateChildDefCallback'),
 2935                 $content_model
 2936             );
 2937             //$def->content_model = str_replace(
 2938             //    $this->keys, $this->values, $content_model);
 2939         }
 2940         $def->child = $this->getChildDef($def, $module);
 2941     }
 2942 
 2943     public function generateChildDefCallback($matches)
 2944     {
 2945         return $this->info[$matches[0]];
 2946     }
 2947 
 2948     /**
 2949      * Instantiates a ChildDef based on content_model and content_model_type
 2950      * member variables in HTMLPurifier_ElementDef
 2951      * @note This will also defer to modules for custom HTMLPurifier_ChildDef
 2952      *       subclasses that need content set expansion
 2953      * @param HTMLPurifier_ElementDef $def HTMLPurifier_ElementDef to have ChildDef extracted
 2954      * @param HTMLPurifier_HTMLModule $module Module that defined the ElementDef
 2955      * @return HTMLPurifier_ChildDef corresponding to ElementDef
 2956      */
 2957     public function getChildDef($def, $module)
 2958     {
 2959         $value = $def->content_model;
 2960         if (is_object($value)) {
 2961             trigger_error(
 2962                 'Literal object child definitions should be stored in '.
 2963                 'ElementDef->child not ElementDef->content_model',
 2964                 E_USER_NOTICE
 2965             );
 2966             return $value;
 2967         }
 2968         switch ($def->content_model_type) {
 2969             case 'required':
 2970                 return new HTMLPurifier_ChildDef_Required($value);
 2971             case 'optional':
 2972                 return new HTMLPurifier_ChildDef_Optional($value);
 2973             case 'empty':
 2974                 return new HTMLPurifier_ChildDef_Empty();
 2975             case 'custom':
 2976                 return new HTMLPurifier_ChildDef_Custom($value);
 2977         }
 2978         // defer to its module
 2979         $return = false;
 2980         if ($module->defines_child_def) { // save a func call
 2981             $return = $module->getChildDef($def);
 2982         }
 2983         if ($return !== false) {
 2984             return $return;
 2985         }
 2986         // error-out
 2987         trigger_error(
 2988             'Could not determine which ChildDef class to instantiate',
 2989             E_USER_ERROR
 2990         );
 2991         return false;
 2992     }
 2993 
 2994     /**
 2995      * Converts a string list of elements separated by pipes into
 2996      * a lookup array.
 2997      * @param string $string List of elements
 2998      * @return array Lookup array of elements
 2999      */
 3000     protected function convertToLookup($string)
 3001     {
 3002         $array = explode('|', str_replace(' ', '', $string));
 3003         $ret = array();
 3004         foreach ($array as $k) {
 3005             $ret[$k] = true;
 3006         }
 3007         return $ret;
 3008     }
 3009 }
 3010 
 3011 
 3012 
 3013 
 3014 
 3015 /**
 3016  * Registry object that contains information about the current context.
 3017  * @warning Is a bit buggy when variables are set to null: it thinks
 3018  *          they don't exist! So use false instead, please.
 3019  * @note Since the variables Context deals with may not be objects,
 3020  *       references are very important here! Do not remove!
 3021  */
 3022 class HTMLPurifier_Context
 3023 {
 3024 
 3025     /**
 3026      * Private array that stores the references.
 3027      * @type array
 3028      */
 3029     private $_storage = array();
 3030 
 3031     /**
 3032      * Registers a variable into the context.
 3033      * @param string $name String name
 3034      * @param mixed $ref Reference to variable to be registered
 3035      */
 3036     public function register($name, &$ref)
 3037     {
 3038         if (array_key_exists($name, $this->_storage)) {
 3039             trigger_error(
 3040                 "Name $name produces collision, cannot re-register",
 3041                 E_USER_ERROR
 3042             );
 3043             return;
 3044         }
 3045         $this->_storage[$name] =& $ref;
 3046     }
 3047 
 3048     /**
 3049      * Retrieves a variable reference from the context.
 3050      * @param string $name String name
 3051      * @param bool $ignore_error Boolean whether or not to ignore error
 3052      * @return mixed
 3053      */
 3054     public function &get($name, $ignore_error = false)
 3055     {
 3056         if (!array_key_exists($name, $this->_storage)) {
 3057             if (!$ignore_error) {
 3058                 trigger_error(
 3059                     "Attempted to retrieve non-existent variable $name",
 3060                     E_USER_ERROR
 3061                 );
 3062             }
 3063             $var = null; // so we can return by reference
 3064             return $var;
 3065         }
 3066         return $this->_storage[$name];
 3067     }
 3068 
 3069     /**
 3070      * Destroys a variable in the context.
 3071      * @param string $name String name
 3072      */
 3073     public function destroy($name)
 3074     {
 3075         if (!array_key_exists($name, $this->_storage)) {
 3076             trigger_error(
 3077                 "Attempted to destroy non-existent variable $name",
 3078                 E_USER_ERROR
 3079             );
 3080             return;
 3081         }
 3082         unset($this->_storage[$name]);
 3083     }
 3084 
 3085     /**
 3086      * Checks whether or not the variable exists.
 3087      * @param string $name String name
 3088      * @return bool
 3089      */
 3090     public function exists($name)
 3091     {
 3092         return array_key_exists($name, $this->_storage);
 3093     }
 3094 
 3095     /**
 3096      * Loads a series of variables from an associative array
 3097      * @param array $context_array Assoc array of variables to load
 3098      */
 3099     public function loadArray($context_array)
 3100     {
 3101         foreach ($context_array as $key => $discard) {
 3102             $this->register($key, $context_array[$key]);
 3103         }
 3104     }
 3105 }
 3106 
 3107 
 3108 
 3109 
 3110 
 3111 /**
 3112  * Abstract class representing Definition cache managers that implements
 3113  * useful common methods and is a factory.
 3114  * @todo Create a separate maintenance file advanced users can use to
 3115  *       cache their custom HTMLDefinition, which can be loaded
 3116  *       via a configuration directive
 3117  * @todo Implement memcached
 3118  */
 3119 abstract class HTMLPurifier_DefinitionCache
 3120 {
 3121     /**
 3122      * @type string
 3123      */
 3124     public $type;
 3125 
 3126     /**
 3127      * @param string $type Type of definition objects this instance of the
 3128      *      cache will handle.
 3129      */
 3130     public function __construct($type)
 3131     {
 3132         $this->type = $type;
 3133     }
 3134 
 3135     /**
 3136      * Generates a unique identifier for a particular configuration
 3137      * @param HTMLPurifier_Config $config Instance of HTMLPurifier_Config
 3138      * @return string
 3139      */
 3140     public function generateKey($config)
 3141     {
 3142         return $config->version . ',' . // possibly replace with function calls
 3143             $config->getBatchSerial($this->type) . ',' .
 3144             $config->get($this->type . '.DefinitionRev');
 3145     }
 3146 
 3147     /**
 3148      * Tests whether or not a key is old with respect to the configuration's
 3149      * version and revision number.
 3150      * @param string $key Key to test
 3151      * @param HTMLPurifier_Config $config Instance of HTMLPurifier_Config to test against
 3152      * @return bool
 3153      */
 3154     public function isOld($key, $config)
 3155     {
 3156         if (substr_count($key, ',') < 2) {
 3157             return true;
 3158         }
 3159         list($version, $hash, $revision) = explode(',', $key, 3);
 3160         $compare = version_compare($version, $config->version);
 3161         // version mismatch, is always old
 3162         if ($compare != 0) {
 3163             return true;
 3164         }
 3165         // versions match, ids match, check revision number
 3166         if ($hash == $config->getBatchSerial($this->type) &&
 3167             $revision < $config->get($this->type . '.DefinitionRev')) {
 3168             return true;
 3169         }
 3170         return false;
 3171     }
 3172 
 3173     /**
 3174      * Checks if a definition's type jives with the cache's type
 3175      * @note Throws an error on failure
 3176      * @param HTMLPurifier_Definition $def Definition object to check
 3177      * @return bool true if good, false if not
 3178      */
 3179     public function checkDefType($def)
 3180     {
 3181         if ($def->type !== $this->type) {
 3182             trigger_error("Cannot use definition of type {$def->type} in cache for {$this->type}");
 3183             return false;
 3184         }
 3185         return true;
 3186     }
 3187 
 3188     /**
 3189      * Adds a definition object to the cache
 3190      * @param HTMLPurifier_Definition $def
 3191      * @param HTMLPurifier_Config $config
 3192      */
 3193     abstract public function add($def, $config);
 3194 
 3195     /**
 3196      * Unconditionally saves a definition object to the cache
 3197      * @param HTMLPurifier_Definition $def
 3198      * @param HTMLPurifier_Config $config
 3199      */
 3200     abstract public function set($def, $config);
 3201 
 3202     /**
 3203      * Replace an object in the cache
 3204      * @param HTMLPurifier_Definition $def
 3205      * @param HTMLPurifier_Config $config
 3206      */
 3207     abstract public function replace($def, $config);
 3208 
 3209     /**
 3210      * Retrieves a definition object from the cache
 3211      * @param HTMLPurifier_Config $config
 3212      */
 3213     abstract public function get($config);
 3214 
 3215     /**
 3216      * Removes a definition object to the cache
 3217      * @param HTMLPurifier_Config $config
 3218      */
 3219     abstract public function remove($config);
 3220 
 3221     /**
 3222      * Clears all objects from cache
 3223      * @param HTMLPurifier_Config $config
 3224      */
 3225     abstract public function flush($config);
 3226 
 3227     /**
 3228      * Clears all expired (older version or revision) objects from cache
 3229      * @note Be careful implementing this method as flush. Flush must
 3230      *       not interfere with other Definition types, and cleanup()
 3231      *       should not be repeatedly called by userland code.
 3232      * @param HTMLPurifier_Config $config
 3233      */
 3234     abstract public function cleanup($config);
 3235 }
 3236 
 3237 
 3238 
 3239 
 3240 
 3241 /**
 3242  * Responsible for creating definition caches.
 3243  */
 3244 class HTMLPurifier_DefinitionCacheFactory
 3245 {
 3246     /**
 3247      * @type array
 3248      */
 3249     protected $caches = array('Serializer' => array());
 3250 
 3251     /**
 3252      * @type array
 3253      */
 3254     protected $implementations = array();
 3255 
 3256     /**
 3257      * @type HTMLPurifier_DefinitionCache_Decorator[]
 3258      */
 3259     protected $decorators = array();
 3260 
 3261     /**
 3262      * Initialize default decorators
 3263      */
 3264     public function setup()
 3265     {
 3266         $this->addDecorator('Cleanup');
 3267     }
 3268 
 3269     /**
 3270      * Retrieves an instance of global definition cache factory.
 3271      * @param HTMLPurifier_DefinitionCacheFactory $prototype
 3272      * @return HTMLPurifier_DefinitionCacheFactory
 3273      */
 3274     public static function instance($prototype = null)
 3275     {
 3276         static $instance;
 3277         if ($prototype !== null) {
 3278             $instance = $prototype;
 3279         } elseif ($instance === null || $prototype === true) {
 3280             $instance = new HTMLPurifier_DefinitionCacheFactory();
 3281             $instance->setup();
 3282         }
 3283         return $instance;
 3284     }
 3285 
 3286     /**
 3287      * Registers a new definition cache object
 3288      * @param string $short Short name of cache object, for reference
 3289      * @param string $long Full class name of cache object, for construction
 3290      */
 3291     public function register($short, $long)
 3292     {
 3293         $this->implementations[$short] = $long;
 3294     }
 3295 
 3296     /**
 3297      * Factory method that creates a cache object based on configuration
 3298      * @param string $type Name of definitions handled by cache
 3299      * @param HTMLPurifier_Config $config Config instance
 3300      * @return mixed
 3301      */
 3302     public function create($type, $config)
 3303     {
 3304         $method = $config->get('Cache.DefinitionImpl');
 3305         if ($method === null) {
 3306             return new HTMLPurifier_DefinitionCache_Null($type);
 3307         }
 3308         if (!empty($this->caches[$method][$type])) {
 3309             return $this->caches[$method][$type];
 3310         }
 3311         if (isset($this->implementations[$method]) &&
 3312             class_exists($class = $this->implementations[$method], false)) {
 3313             $cache = new $class($type);
 3314         } else {
 3315             if ($method != 'Serializer') {
 3316                 trigger_error("Unrecognized DefinitionCache $method, using Serializer instead", E_USER_WARNING);
 3317             }
 3318             $cache = new HTMLPurifier_DefinitionCache_Serializer($type);
 3319         }
 3320         foreach ($this->decorators as $decorator) {
 3321             $new_cache = $decorator->decorate($cache);
 3322             // prevent infinite recursion in PHP 4
 3323             unset($cache);
 3324             $cache = $new_cache;
 3325         }
 3326         $this->caches[$method][$type] = $cache;
 3327         return $this->caches[$method][$type];
 3328     }
 3329 
 3330     /**
 3331      * Registers a decorator to add to all new cache objects
 3332      * @param HTMLPurifier_DefinitionCache_Decorator|string $decorator An instance or the name of a decorator
 3333      */
 3334     public function addDecorator($decorator)
 3335     {
 3336         if (is_string($decorator)) {
 3337             $class = "HTMLPurifier_DefinitionCache_Decorator_$decorator";
 3338             $decorator = new $class;
 3339         }
 3340         $this->decorators[$decorator->name] = $decorator;
 3341     }
 3342 }
 3343 
 3344 
 3345 
 3346 
 3347 
 3348 /**
 3349  * Represents a document type, contains information on which modules
 3350  * need to be loaded.
 3351  * @note This class is inspected by Printer_HTMLDefinition->renderDoctype.
 3352  *       If structure changes, please update that function.
 3353  */
 3354 class HTMLPurifier_Doctype
 3355 {
 3356     /**
 3357      * Full name of doctype
 3358      * @type string
 3359      */
 3360     public $name;
 3361 
 3362     /**
 3363      * List of standard modules (string identifiers or literal objects)
 3364      * that this doctype uses
 3365      * @type array
 3366      */
 3367     public $modules = array();
 3368 
 3369     /**
 3370      * List of modules to use for tidying up code
 3371      * @type array
 3372      */
 3373     public $tidyModules = array();
 3374 
 3375     /**
 3376      * Is the language derived from XML (i.e. XHTML)?
 3377      * @type bool
 3378      */
 3379     public $xml = true;
 3380 
 3381     /**
 3382      * List of aliases for this doctype
 3383      * @type array
 3384      */
 3385     public $aliases = array();
 3386 
 3387     /**
 3388      * Public DTD identifier
 3389      * @type string
 3390      */
 3391     public $dtdPublic;
 3392 
 3393     /**
 3394      * System DTD identifier
 3395      * @type string
 3396      */
 3397     public $dtdSystem;
 3398 
 3399     public function __construct(
 3400         $name = null,
 3401         $xml = true,
 3402         $modules = array(),
 3403         $tidyModules = array(),
 3404         $aliases = array(),
 3405         $dtd_public = null,
 3406         $dtd_system = null
 3407     ) {
 3408         $this->name         = $name;
 3409         $this->xml          = $xml;
 3410         $this->modules      = $modules;
 3411         $this->tidyModules  = $tidyModules;
 3412         $this->aliases      = $aliases;
 3413         $this->dtdPublic    = $dtd_public;
 3414         $this->dtdSystem    = $dtd_system;
 3415     }
 3416 }
 3417 
 3418 
 3419 
 3420 
 3421 
 3422 class HTMLPurifier_DoctypeRegistry
 3423 {
 3424 
 3425     /**
 3426      * Hash of doctype names to doctype objects.
 3427      * @type array
 3428      */
 3429     protected $doctypes;
 3430 
 3431     /**
 3432      * Lookup table of aliases to real doctype names.
 3433      * @type array
 3434      */
 3435     protected $aliases;
 3436 
 3437     /**
 3438      * Registers a doctype to the registry
 3439      * @note Accepts a fully-formed doctype object, or the
 3440      *       parameters for constructing a doctype object
 3441      * @param string $doctype Name of doctype or literal doctype object
 3442      * @param bool $xml
 3443      * @param array $modules Modules doctype will load
 3444      * @param array $tidy_modules Modules doctype will load for certain modes
 3445      * @param array $aliases Alias names for doctype
 3446      * @param string $dtd_public
 3447      * @param string $dtd_system
 3448      * @return HTMLPurifier_Doctype Editable registered doctype
 3449      */
 3450     public function register(
 3451         $doctype,
 3452         $xml = true,
 3453         $modules = array(),
 3454         $tidy_modules = array(),
 3455         $aliases = array(),
 3456         $dtd_public = null,
 3457         $dtd_system = null
 3458     ) {
 3459         if (!is_array($modules)) {
 3460             $modules = array($modules);
 3461         }
 3462         if (!is_array($tidy_modules)) {
 3463             $tidy_modules = array($tidy_modules);
 3464         }
 3465         if (!is_array($aliases)) {
 3466             $aliases = array($aliases);
 3467         }
 3468         if (!is_object($doctype)) {
 3469             $doctype = new HTMLPurifier_Doctype(
 3470                 $doctype,
 3471                 $xml,
 3472                 $modules,
 3473                 $tidy_modules,
 3474                 $aliases,
 3475                 $dtd_public,
 3476                 $dtd_system
 3477             );
 3478         }
 3479         $this->doctypes[$doctype->name] = $doctype;
 3480         $name = $doctype->name;
 3481         // hookup aliases
 3482         foreach ($doctype->aliases as $alias) {
 3483             if (isset($this->doctypes[$alias])) {
 3484                 continue;
 3485             }
 3486             $this->aliases[$alias] = $name;
 3487         }
 3488         // remove old aliases
 3489         if (isset($this->aliases[$name])) {
 3490             unset($this->aliases[$name]);
 3491         }
 3492         return $doctype;
 3493     }
 3494 
 3495     /**
 3496      * Retrieves reference to a doctype of a certain name
 3497      * @note This function resolves aliases
 3498      * @note When possible, use the more fully-featured make()
 3499      * @param string $doctype Name of doctype
 3500      * @return HTMLPurifier_Doctype Editable doctype object
 3501      */
 3502     public function get($doctype)
 3503     {
 3504         if (isset($this->aliases[$doctype])) {
 3505             $doctype = $this->aliases[$doctype];
 3506         }
 3507         if (!isset($this->doctypes[$doctype])) {
 3508             trigger_error('Doctype ' . htmlspecialchars($doctype) . ' does not exist', E_USER_ERROR);
 3509             $anon = new HTMLPurifier_Doctype($doctype);
 3510             return $anon;
 3511         }
 3512         return $this->doctypes[$doctype];
 3513     }
 3514 
 3515     /**
 3516      * Creates a doctype based on a configuration object,
 3517      * will perform initialization on the doctype
 3518      * @note Use this function to get a copy of doctype that config
 3519      *       can hold on to (this is necessary in order to tell
 3520      *       Generator whether or not the current document is XML
 3521      *       based or not).
 3522      * @param HTMLPurifier_Config $config
 3523      * @return HTMLPurifier_Doctype
 3524      */
 3525     public function make($config)
 3526     {
 3527         return clone $this->get($this->getDoctypeFromConfig($config));
 3528     }
 3529 
 3530     /**
 3531      * Retrieves the doctype from the configuration object
 3532      * @param HTMLPurifier_Config $config
 3533      * @return string
 3534      */
 3535     public function getDoctypeFromConfig($config)
 3536     {
 3537         // recommended test
 3538         $doctype = $config->get('HTML.Doctype');
 3539         if (!empty($doctype)) {
 3540             return $doctype;
 3541         }
 3542         $doctype = $config->get('HTML.CustomDoctype');
 3543         if (!empty($doctype)) {
 3544             return $doctype;
 3545         }
 3546         // backwards-compatibility
 3547         if ($config->get('HTML.XHTML')) {
 3548             $doctype = 'XHTML 1.0';
 3549         } else {
 3550             $doctype = 'HTML 4.01';
 3551         }
 3552         if ($config->get('HTML.Strict')) {
 3553             $doctype .= ' Strict';
 3554         } else {
 3555             $doctype .= ' Transitional';
 3556         }
 3557         return $doctype;
 3558     }
 3559 }
 3560 
 3561 
 3562 
 3563 
 3564 
 3565 /**
 3566  * Structure that stores an HTML element definition. Used by
 3567  * HTMLPurifier_HTMLDefinition and HTMLPurifier_HTMLModule.
 3568  * @note This class is inspected by HTMLPurifier_Printer_HTMLDefinition.
 3569  *       Please update that class too.
 3570  * @warning If you add new properties to this class, you MUST update
 3571  *          the mergeIn() method.
 3572  */
 3573 class HTMLPurifier_ElementDef
 3574 {
 3575     /**
 3576      * Does the definition work by itself, or is it created solely
 3577      * for the purpose of merging into another definition?
 3578      * @type bool
 3579      */
 3580     public $standalone = true;
 3581 
 3582     /**
 3583      * Associative array of attribute name to HTMLPurifier_AttrDef.
 3584      * @type array
 3585      * @note Before being processed by HTMLPurifier_AttrCollections
 3586      *       when modules are finalized during
 3587      *       HTMLPurifier_HTMLDefinition->setup(), this array may also
 3588      *       contain an array at index 0 that indicates which attribute
 3589      *       collections to load into the full array. It may also
 3590      *       contain string indentifiers in lieu of HTMLPurifier_AttrDef,
 3591      *       see HTMLPurifier_AttrTypes on how they are expanded during
 3592      *       HTMLPurifier_HTMLDefinition->setup() processing.
 3593      */
 3594     public $attr = array();
 3595 
 3596     // XXX: Design note: currently, it's not possible to override
 3597     // previously defined AttrTransforms without messing around with
 3598     // the final generated config. This is by design; a previous version
 3599     // used an associated list of attr_transform, but it was extremely
 3600     // easy to accidentally override other attribute transforms by
 3601     // forgetting to specify an index (and just using 0.)  While we
 3602     // could check this by checking the index number and complaining,
 3603     // there is a second problem which is that it is not at all easy to
 3604     // tell when something is getting overridden. Combine this with a
 3605     // codebase where this isn't really being used, and it's perfect for
 3606     // nuking.
 3607 
 3608     /**
 3609      * List of tags HTMLPurifier_AttrTransform to be done before validation.
 3610      * @type array
 3611      */
 3612     public $attr_transform_pre = array();
 3613 
 3614     /**
 3615      * List of tags HTMLPurifier_AttrTransform to be done after validation.
 3616      * @type array
 3617      */
 3618     public $attr_transform_post = array();
 3619 
 3620     /**
 3621      * HTMLPurifier_ChildDef of this tag.
 3622      * @type HTMLPurifier_ChildDef
 3623      */
 3624     public $child;
 3625 
 3626     /**
 3627      * Abstract string representation of internal ChildDef rules.
 3628      * @see HTMLPurifier_ContentSets for how this is parsed and then transformed
 3629      * into an HTMLPurifier_ChildDef.
 3630      * @warning This is a temporary variable that is not available after
 3631      *      being processed by HTMLDefinition
 3632      * @type string
 3633      */
 3634     public $content_model;
 3635 
 3636     /**
 3637      * Value of $child->type, used to determine which ChildDef to use,
 3638      * used in combination with $content_model.
 3639      * @warning This must be lowercase
 3640      * @warning This is a temporary variable that is not available after
 3641      *      being processed by HTMLDefinition
 3642      * @type string
 3643      */
 3644     public $content_model_type;
 3645 
 3646     /**
 3647      * Does the element have a content model (#PCDATA | Inline)*? This
 3648      * is important for chameleon ins and del processing in
 3649      * HTMLPurifier_ChildDef_Chameleon. Dynamically set: modules don't
 3650      * have to worry about this one.
 3651      * @type bool
 3652      */
 3653     public $descendants_are_inline = false;
 3654 
 3655     /**
 3656      * List of the names of required attributes this element has.
 3657      * Dynamically populated by HTMLPurifier_HTMLDefinition::getElement()
 3658      * @type array
 3659      */
 3660     public $required_attr = array();
 3661 
 3662     /**
 3663      * Lookup table of tags excluded from all descendants of this tag.
 3664      * @type array
 3665      * @note SGML permits exclusions for all descendants, but this is
 3666      *       not possible with DTDs or XML Schemas. W3C has elected to
 3667      *       use complicated compositions of content_models to simulate
 3668      *       exclusion for children, but we go the simpler, SGML-style
 3669      *       route of flat-out exclusions, which correctly apply to
 3670      *       all descendants and not just children. Note that the XHTML
 3671      *       Modularization Abstract Modules are blithely unaware of such
 3672      *       distinctions.
 3673      */
 3674     public $excludes = array();
 3675 
 3676     /**
 3677      * This tag is explicitly auto-closed by the following tags.
 3678      * @type array
 3679      */
 3680     public $autoclose = array();
 3681 
 3682     /**
 3683      * If a foreign element is found in this element, test if it is
 3684      * allowed by this sub-element; if it is, instead of closing the
 3685      * current element, place it inside this element.
 3686      * @type string
 3687      */
 3688     public $wrap;
 3689 
 3690     /**
 3691      * Whether or not this is a formatting element affected by the
 3692      * "Active Formatting Elements" algorithm.
 3693      * @type bool
 3694      */
 3695     public $formatting;
 3696 
 3697     /**
 3698      * Low-level factory constructor for creating new standalone element defs
 3699      */
 3700     public static function create($content_model, $content_model_type, $attr)
 3701     {
 3702         $def = new HTMLPurifier_ElementDef();
 3703         $def->content_model = $content_model;
 3704         $def->content_model_type = $content_model_type;
 3705         $def->attr = $attr;
 3706         return $def;
 3707     }
 3708 
 3709     /**
 3710      * Merges the values of another element definition into this one.
 3711      * Values from the new element def take precedence if a value is
 3712      * not mergeable.
 3713      * @param HTMLPurifier_ElementDef $def
 3714      */
 3715     public function mergeIn($def)
 3716     {
 3717         // later keys takes precedence
 3718         foreach ($def->attr as $k => $v) {
 3719             if ($k === 0) {
 3720                 // merge in the includes
 3721                 // sorry, no way to override an include
 3722                 foreach ($v as $v2) {
 3723                     $this->attr[0][] = $v2;
 3724                 }
 3725                 continue;
 3726             }
 3727             if ($v === false) {
 3728                 if (isset($this->attr[$k])) {
 3729                     unset($this->attr[$k]);
 3730                 }
 3731                 continue;
 3732             }
 3733             $this->attr[$k] = $v;
 3734         }
 3735         $this->_mergeAssocArray($this->excludes, $def->excludes);
 3736         $this->attr_transform_pre = array_merge($this->attr_transform_pre, $def->attr_transform_pre);
 3737         $this->attr_transform_post = array_merge($this->attr_transform_post, $def->attr_transform_post);
 3738 
 3739         if (!empty($def->content_model)) {
 3740             $this->content_model =
 3741                 str_replace("#SUPER", $this->content_model, $def->content_model);
 3742             $this->child = false;
 3743         }
 3744         if (!empty($def->content_model_type)) {
 3745             $this->content_model_type = $def->content_model_type;
 3746             $this->child = false;
 3747         }
 3748         if (!is_null($def->child)) {
 3749             $this->child = $def->child;
 3750         }
 3751         if (!is_null($def->formatting)) {
 3752             $this->formatting = $def->formatting;
 3753         }
 3754         if ($def->descendants_are_inline) {
 3755             $this->descendants_are_inline = $def->descendants_are_inline;
 3756         }
 3757     }
 3758 
 3759     /**
 3760      * Merges one array into another, removes values which equal false
 3761      * @param $a1 Array by reference that is merged into
 3762      * @param $a2 Array that merges into $a1
 3763      */
 3764     private function _mergeAssocArray(&$a1, $a2)
 3765     {
 3766         foreach ($a2 as $k => $v) {
 3767             if ($v === false) {
 3768                 if (isset($a1[$k])) {
 3769                     unset($a1[$k]);
 3770                 }
 3771                 continue;
 3772             }
 3773             $a1[$k] = $v;
 3774         }
 3775     }
 3776 }
 3777 
 3778 
 3779 
 3780 
 3781 
 3782 /**
 3783  * A UTF-8 specific character encoder that handles cleaning and transforming.
 3784  * @note All functions in this class should be static.
 3785  */
 3786 class HTMLPurifier_Encoder
 3787 {
 3788 
 3789     /**
 3790      * Constructor throws fatal error if you attempt to instantiate class
 3791      */
 3792     private function __construct()
 3793     {
 3794         trigger_error('Cannot instantiate encoder, call methods statically', E_USER_ERROR);
 3795     }
 3796 
 3797     /**
 3798      * Error-handler that mutes errors, alternative to shut-up operator.
 3799      */
 3800     public static function muteErrorHandler()
 3801     {
 3802     }
 3803 
 3804     /**
 3805      * iconv wrapper which mutes errors, but doesn't work around bugs.
 3806      * @param string $in Input encoding
 3807      * @param string $out Output encoding
 3808      * @param string $text The text to convert
 3809      * @return string
 3810      */
 3811     public static function unsafeIconv($in, $out, $text)
 3812     {
 3813         set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
 3814         $r = iconv($in, $out, $text);
 3815         restore_error_handler();
 3816         return $r;
 3817     }
 3818 
 3819     /**
 3820      * iconv wrapper which mutes errors and works around bugs.
 3821      * @param string $in Input encoding
 3822      * @param string $out Output encoding
 3823      * @param string $text The text to convert
 3824      * @param int $max_chunk_size
 3825      * @return string
 3826      */
 3827     public static function iconv($in, $out, $text, $max_chunk_size = 8000)
 3828     {
 3829         $code = self::testIconvTruncateBug();
 3830         if ($code == self::ICONV_OK) {
 3831             return self::unsafeIconv($in, $out, $text);
 3832         } elseif ($code == self::ICONV_TRUNCATES) {
 3833             // we can only work around this if the input character set
 3834             // is utf-8
 3835             if ($in == 'utf-8') {
 3836                 if ($max_chunk_size < 4) {
 3837                     trigger_error('max_chunk_size is too small', E_USER_WARNING);
 3838                     return false;
 3839                 }
 3840                 // split into 8000 byte chunks, but be careful to handle
 3841                 // multibyte boundaries properly
 3842                 if (($c = strlen($text)) <= $max_chunk_size) {
 3843                     return self::unsafeIconv($in, $out, $text);
 3844                 }
 3845                 $r = '';
 3846                 $i = 0;
 3847                 while (true) {
 3848                     if ($i + $max_chunk_size >= $c) {
 3849                         $r .= self::unsafeIconv($in, $out, substr($text, $i));
 3850                         break;
 3851                     }
 3852                     // wibble the boundary
 3853                     if (0x80 != (0xC0 & ord($text[$i + $max_chunk_size]))) {
 3854                         $chunk_size = $max_chunk_size;
 3855                     } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 1]))) {
 3856                         $chunk_size = $max_chunk_size - 1;
 3857                     } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 2]))) {
 3858                         $chunk_size = $max_chunk_size - 2;
 3859                     } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 3]))) {
 3860                         $chunk_size = $max_chunk_size - 3;
 3861                     } else {
 3862                         return false; // rather confusing UTF-8...
 3863                     }
 3864                     $chunk = substr($text, $i, $chunk_size); // substr doesn't mind overlong lengths
 3865                     $r .= self::unsafeIconv($in, $out, $chunk);
 3866                     $i += $chunk_size;
 3867                 }
 3868                 return $r;
 3869             } else {
 3870                 return false;
 3871             }
 3872         } else {
 3873             return false;
 3874         }
 3875     }
 3876 
 3877     /**
 3878      * Cleans a UTF-8 string for well-formedness and SGML validity
 3879      *
 3880      * It will parse according to UTF-8 and return a valid UTF8 string, with
 3881      * non-SGML codepoints excluded.
 3882      *
 3883      * Specifically, it will permit:
 3884      * \x{9}\x{A}\x{D}\x{20}-\x{7E}\x{A0}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}
 3885      * Source: https://www.w3.org/TR/REC-xml/#NT-Char
 3886      * Arguably this function should be modernized to the HTML5 set
 3887      * of allowed characters:
 3888      * https://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
 3889      * which simultaneously expand and restrict the set of allowed characters.
 3890      *
 3891      * @param string $str The string to clean
 3892      * @param bool $force_php
 3893      * @return string
 3894      *
 3895      * @note Just for reference, the non-SGML code points are 0 to 31 and
 3896      *       127 to 159, inclusive.  However, we allow code points 9, 10
 3897      *       and 13, which are the tab, line feed and carriage return
 3898      *       respectively. 128 and above the code points map to multibyte
 3899      *       UTF-8 representations.
 3900      *
 3901      * @note Fallback code adapted from utf8ToUnicode by Henri Sivonen and
 3902      *       hsivonen@iki.fi at <http://iki.fi/hsivonen/php-utf8/> under the
 3903      *       LGPL license.  Notes on what changed are inside, but in general,
 3904      *       the original code transformed UTF-8 text into an array of integer
 3905      *       Unicode codepoints. Understandably, transforming that back to
 3906      *       a string would be somewhat expensive, so the function was modded to
 3907      *       directly operate on the string.  However, this discourages code
 3908      *       reuse, and the logic enumerated here would be useful for any
 3909      *       function that needs to be able to understand UTF-8 characters.
 3910      *       As of right now, only smart lossless character encoding converters
 3911      *       would need that, and I'm probably not going to implement them.
 3912      */
 3913     public static function cleanUTF8($str, $force_php = false)
 3914     {
 3915         // UTF-8 validity is checked since PHP 4.3.5
 3916         // This is an optimization: if the string is already valid UTF-8, no
 3917         // need to do PHP stuff. 99% of the time, this will be the case.
 3918         if (preg_match(
 3919             '/^[\x{9}\x{A}\x{D}\x{20}-\x{7E}\x{A0}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]*$/Du',
 3920             $str
 3921         )) {
 3922             return $str;
 3923         }
 3924 
 3925         $mState = 0; // cached expected number of octets after the current octet
 3926         // until the beginning of the next UTF8 character sequence
 3927         $mUcs4  = 0; // cached Unicode character
 3928         $mBytes = 1; // cached expected number of octets in the current sequence
 3929 
 3930         // original code involved an $out that was an array of Unicode
 3931         // codepoints.  Instead of having to convert back into UTF-8, we've
 3932         // decided to directly append valid UTF-8 characters onto a string
 3933         // $out once they're done.  $char accumulates raw bytes, while $mUcs4
 3934         // turns into the Unicode code point, so there's some redundancy.
 3935 
 3936         $out = '';
 3937         $char = '';
 3938 
 3939         $len = strlen($str);
 3940         for ($i = 0; $i < $len; $i++) {
 3941             $in = ord($str[$i]);
 3942             $char .= $str[$i]; // append byte to char
 3943             if (0 == $mState) {
 3944                 // When mState is zero we expect either a US-ASCII character
 3945                 // or a multi-octet sequence.
 3946                 if (0 == (0x80 & ($in))) {
 3947                     // US-ASCII, pass straight through.
 3948                     if (($in <= 31 || $in == 127) &&
 3949                         !($in == 9 || $in == 13 || $in == 10) // save \r\t\n
 3950                     ) {
 3951                         // control characters, remove
 3952                     } else {
 3953                         $out .= $char;
 3954                     }
 3955                     // reset
 3956                     $char = '';
 3957                     $mBytes = 1;
 3958                 } elseif (0xC0 == (0xE0 & ($in))) {
 3959                     // First octet of 2 octet sequence
 3960                     $mUcs4 = ($in);
 3961                     $mUcs4 = ($mUcs4 & 0x1F) << 6;
 3962                     $mState = 1;
 3963                     $mBytes = 2;
 3964                 } elseif (0xE0 == (0xF0 & ($in))) {
 3965                     // First octet of 3 octet sequence
 3966                     $mUcs4 = ($in);
 3967                     $mUcs4 = ($mUcs4 & 0x0F) << 12;
 3968                     $mState = 2;
 3969                     $mBytes = 3;
 3970                 } elseif (0xF0 == (0xF8 & ($in))) {
 3971                     // First octet of 4 octet sequence
 3972                     $mUcs4 = ($in);
 3973                     $mUcs4 = ($mUcs4 & 0x07) << 18;
 3974                     $mState = 3;
 3975                     $mBytes = 4;
 3976                 } elseif (0xF8 == (0xFC & ($in))) {
 3977                     // First octet of 5 octet sequence.
 3978                     //
 3979                     // This is illegal because the encoded codepoint must be
 3980                     // either:
 3981                     // (a) not the shortest form or
 3982                     // (b) outside the Unicode range of 0-0x10FFFF.
 3983                     // Rather than trying to resynchronize, we will carry on
 3984                     // until the end of the sequence and let the later error
 3985                     // handling code catch it.
 3986                     $mUcs4 = ($in);
 3987                     $mUcs4 = ($mUcs4 & 0x03) << 24;
 3988                     $mState = 4;
 3989                     $mBytes = 5;
 3990                 } elseif (0xFC == (0xFE & ($in))) {
 3991                     // First octet of 6 octet sequence, see comments for 5
 3992                     // octet sequence.
 3993                     $mUcs4 = ($in);
 3994                     $mUcs4 = ($mUcs4 & 1) << 30;
 3995                     $mState = 5;
 3996                     $mBytes = 6;
 3997                 } else {
 3998                     // Current octet is neither in the US-ASCII range nor a
 3999                     // legal first octet of a multi-octet sequence.
 4000                     $mState = 0;
 4001                     $mUcs4  = 0;
 4002                     $mBytes = 1;
 4003                     $char = '';
 4004                 }
 4005             } else {
 4006                 // When mState is non-zero, we expect a continuation of the
 4007                 // multi-octet sequence
 4008                 if (0x80 == (0xC0 & ($in))) {
 4009                     // Legal continuation.
 4010                     $shift = ($mState - 1) * 6;
 4011                     $tmp = $in;
 4012                     $tmp = ($tmp & 0x0000003F) << $shift;
 4013                     $mUcs4 |= $tmp;
 4014 
 4015                     if (0 == --$mState) {
 4016                         // End of the multi-octet sequence. mUcs4 now contains
 4017                         // the final Unicode codepoint to be output
 4018 
 4019                         // Check for illegal sequences and codepoints.
 4020 
 4021                         // From Unicode 3.1, non-shortest form is illegal
 4022                         if (((2 == $mBytes) && ($mUcs4 < 0x0080)) ||
 4023                             ((3 == $mBytes) && ($mUcs4 < 0x0800)) ||
 4024                             ((4 == $mBytes) && ($mUcs4 < 0x10000)) ||
 4025                             (4 < $mBytes) ||
 4026                             // From Unicode 3.2, surrogate characters = illegal
 4027                             (($mUcs4 & 0xFFFFF800) == 0xD800) ||
 4028                             // Codepoints outside the Unicode range are illegal
 4029                             ($mUcs4 > 0x10FFFF)
 4030                         ) {
 4031 
 4032                         } elseif (0xFEFF != $mUcs4 && // omit BOM
 4033                             // check for valid Char unicode codepoints
 4034                             (
 4035                                 0x9 == $mUcs4 ||
 4036                                 0xA == $mUcs4 ||
 4037                                 0xD == $mUcs4 ||
 4038                                 (0x20 <= $mUcs4 && 0x7E >= $mUcs4) ||
 4039                                 // 7F-9F is not strictly prohibited by XML,
 4040                                 // but it is non-SGML, and thus we don't allow it
 4041                                 (0xA0 <= $mUcs4 && 0xD7FF >= $mUcs4) ||
 4042                                 (0xE000 <= $mUcs4 && 0xFFFD >= $mUcs4) ||
 4043                                 (0x10000 <= $mUcs4 && 0x10FFFF >= $mUcs4)
 4044                             )
 4045                         ) {
 4046                             $out .= $char;
 4047                         }
 4048                         // initialize UTF8 cache (reset)
 4049                         $mState = 0;
 4050                         $mUcs4  = 0;
 4051                         $mBytes = 1;
 4052                         $char = '';
 4053                     }
 4054                 } else {
 4055                     // ((0xC0 & (*in) != 0x80) && (mState != 0))
 4056                     // Incomplete multi-octet sequence.
 4057                     // used to result in complete fail, but we'll reset
 4058                     $mState = 0;
 4059                     $mUcs4  = 0;
 4060                     $mBytes = 1;
 4061                     $char ='';
 4062                 }
 4063             }
 4064         }
 4065         return $out;
 4066     }
 4067 
 4068     /**
 4069      * Translates a Unicode codepoint into its corresponding UTF-8 character.
 4070      * @note Based on Feyd's function at
 4071      *       <http://forums.devnetwork.net/viewtopic.php?p=191404#191404>,
 4072      *       which is in public domain.
 4073      * @note While we're going to do code point parsing anyway, a good
 4074      *       optimization would be to refuse to translate code points that
 4075      *       are non-SGML characters.  However, this could lead to duplication.
 4076      * @note This is very similar to the unichr function in
 4077      *       maintenance/generate-entity-file.php (although this is superior,
 4078      *       due to its sanity checks).
 4079      */
 4080 
 4081     // +----------+----------+----------+----------+
 4082     // | 33222222 | 22221111 | 111111   |          |
 4083     // | 10987654 | 32109876 | 54321098 | 76543210 | bit
 4084     // +----------+----------+----------+----------+
 4085     // |          |          |          | 0xxxxxxx | 1 byte 0x00000000..0x0000007F
 4086     // |          |          | 110yyyyy | 10xxxxxx | 2 byte 0x00000080..0x000007FF
 4087     // |          | 1110zzzz | 10yyyyyy | 10xxxxxx | 3 byte 0x00000800..0x0000FFFF
 4088     // | 11110www | 10wwzzzz | 10yyyyyy | 10xxxxxx | 4 byte 0x00010000..0x0010FFFF
 4089     // +----------+----------+----------+----------+
 4090     // | 00000000 | 00011111 | 11111111 | 11111111 | Theoretical upper limit of legal scalars: 2097151 (0x001FFFFF)
 4091     // | 00000000 | 00010000 | 11111111 | 11111111 | Defined upper limit of legal scalar codes
 4092     // +----------+----------+----------+----------+
 4093 
 4094     public static function unichr($code)
 4095     {
 4096         if ($code > 1114111 or $code < 0 or
 4097             ($code >= 55296 and $code <= 57343) ) {
 4098             // bits are set outside the "valid" range as defined
 4099             // by UNICODE 4.1.0
 4100             return '';
 4101         }
 4102 
 4103         $x = $y = $z = $w = 0;
 4104         if ($code < 128) {
 4105             // regular ASCII character
 4106             $x = $code;
 4107         } else {
 4108             // set up bits for UTF-8
 4109             $x = ($code & 63) | 128;
 4110             if ($code < 2048) {
 4111                 $y = (($code & 2047) >> 6) | 192;
 4112             } else {
 4113                 $y = (($code & 4032) >> 6) | 128;
 4114                 if ($code < 65536) {
 4115                     $z = (($code >> 12) & 15) | 224;
 4116                 } else {
 4117                     $z = (($code >> 12) & 63) | 128;
 4118                     $w = (($code >> 18) & 7)  | 240;
 4119                 }
 4120             }
 4121         }
 4122         // set up the actual character
 4123         $ret = '';
 4124         if ($w) {
 4125             $ret .= chr($w);
 4126         }
 4127         if ($z) {
 4128             $ret .= chr($z);
 4129         }
 4130         if ($y) {
 4131             $ret .= chr($y);
 4132         }
 4133         $ret .= chr($x);
 4134 
 4135         return $ret;
 4136     }
 4137 
 4138     /**
 4139      * @return bool
 4140      */
 4141     public static function iconvAvailable()
 4142     {
 4143         static $iconv = null;
 4144         if ($iconv === null) {
 4145             $iconv = function_exists('iconv') && self::testIconvTruncateBug() != self::ICONV_UNUSABLE;
 4146         }
 4147         return $iconv;
 4148     }
 4149 
 4150     /**
 4151      * Convert a string to UTF-8 based on configuration.
 4152      * @param string $str The string to convert
 4153      * @param HTMLPurifier_Config $config
 4154      * @param HTMLPurifier_Context $context
 4155      * @return string
 4156      */
 4157     public static function convertToUTF8($str, $config, $context)
 4158     {
 4159         $encoding = $config->get('Core.Encoding');
 4160         if ($encoding === 'utf-8') {
 4161             return $str;
 4162         }
 4163         static $iconv = null;
 4164         if ($iconv === null) {
 4165             $iconv = self::iconvAvailable();
 4166         }
 4167         if ($iconv && !$config->get('Test.ForceNoIconv')) {
 4168             // unaffected by bugs, since UTF-8 support all characters
 4169             $str = self::unsafeIconv($encoding, 'utf-8//IGNORE', $str);
 4170             if ($str === false) {
 4171                 // $encoding is not a valid encoding
 4172                 trigger_error('Invalid encoding ' . $encoding, E_USER_ERROR);
 4173                 return '';
 4174             }
 4175             // If the string is bjorked by Shift_JIS or a similar encoding
 4176             // that doesn't support all of ASCII, convert the naughty
 4177             // characters to their true byte-wise ASCII/UTF-8 equivalents.
 4178             $str = strtr($str, self::testEncodingSupportsASCII($encoding));
 4179             return $str;
 4180         } elseif ($encoding === 'iso-8859-1') {
 4181             $str = utf8_encode($str);
 4182             return $str;
 4183         }
 4184         $bug = HTMLPurifier_Encoder::testIconvTruncateBug();
 4185         if ($bug == self::ICONV_OK) {
 4186             trigger_error('Encoding not supported, please install iconv', E_USER_ERROR);
 4187         } else {
 4188             trigger_error(
 4189                 'You have a buggy version of iconv, see https://bugs.php.net/bug.php?id=48147 ' .
 4190                 'and http://sourceware.org/bugzilla/show_bug.cgi?id=13541',
 4191                 E_USER_ERROR
 4192             );
 4193         }
 4194     }
 4195 
 4196     /**
 4197      * Converts a string from UTF-8 based on configuration.
 4198      * @param string $str The string to convert
 4199      * @param HTMLPurifier_Config $config
 4200      * @param HTMLPurifier_Context $context
 4201      * @return string
 4202      * @note Currently, this is a lossy conversion, with unexpressable
 4203      *       characters being omitted.
 4204      */
 4205     public static function convertFromUTF8($str, $config, $context)
 4206     {
 4207         $encoding = $config->get('Core.Encoding');
 4208         if ($escape = $config->get('Core.EscapeNonASCIICharacters')) {
 4209             $str = self::convertToASCIIDumbLossless($str);
 4210         }
 4211         if ($encoding === 'utf-8') {
 4212             return $str;
 4213         }
 4214         static $iconv = null;
 4215         if ($iconv === null) {
 4216             $iconv = self::iconvAvailable();
 4217         }
 4218         if ($iconv && !$config->get('Test.ForceNoIconv')) {
 4219             // Undo our previous fix in convertToUTF8, otherwise iconv will barf
 4220             $ascii_fix = self::testEncodingSupportsASCII($encoding);
 4221             if (!$escape && !empty($ascii_fix)) {
 4222                 $clear_fix = array();
 4223                 foreach ($ascii_fix as $utf8 => $native) {
 4224                     $clear_fix[$utf8] = '';
 4225                 }
 4226                 $str = strtr($str, $clear_fix);
 4227             }
 4228             $str = strtr($str, array_flip($ascii_fix));
 4229             // Normal stuff
 4230             $str = self::iconv('utf-8', $encoding . '//IGNORE', $str);
 4231             return $str;
 4232         } elseif ($encoding === 'iso-8859-1') {
 4233             $str = utf8_decode($str);
 4234             return $str;
 4235         }
 4236         trigger_error('Encoding not supported', E_USER_ERROR);
 4237         // You might be tempted to assume that the ASCII representation
 4238         // might be OK, however, this is *not* universally true over all
 4239         // encodings.  So we take the conservative route here, rather
 4240         // than forcibly turn on %Core.EscapeNonASCIICharacters
 4241     }
 4242 
 4243     /**
 4244      * Lossless (character-wise) conversion of HTML to ASCII
 4245      * @param string $str UTF-8 string to be converted to ASCII
 4246      * @return string ASCII encoded string with non-ASCII character entity-ized
 4247      * @warning Adapted from MediaWiki, claiming fair use: this is a common
 4248      *       algorithm. If you disagree with this license fudgery,
 4249      *       implement it yourself.
 4250      * @note Uses decimal numeric entities since they are best supported.
 4251      * @note This is a DUMB function: it has no concept of keeping
 4252      *       character entities that the projected character encoding
 4253      *       can allow. We could possibly implement a smart version
 4254      *       but that would require it to also know which Unicode
 4255      *       codepoints the charset supported (not an easy task).
 4256      * @note Sort of with cleanUTF8() but it assumes that $str is
 4257      *       well-formed UTF-8
 4258      */
 4259     public static function convertToASCIIDumbLossless($str)
 4260     {
 4261         $bytesleft = 0;
 4262         $result = '';
 4263         $working = 0;
 4264         $len = strlen($str);
 4265         for ($i = 0; $i < $len; $i++) {
 4266             $bytevalue = ord($str[$i]);
 4267             if ($bytevalue <= 0x7F) { //0xxx xxxx
 4268                 $result .= chr($bytevalue);
 4269                 $bytesleft = 0;
 4270             } elseif ($bytevalue <= 0xBF) { //10xx xxxx
 4271                 $working = $working << 6;
 4272                 $working += ($bytevalue & 0x3F);
 4273                 $bytesleft--;
 4274                 if ($bytesleft <= 0) {
 4275                     $result .= "&#" . $working . ";";
 4276                 }
 4277             } elseif ($bytevalue <= 0xDF) { //110x xxxx
 4278                 $working = $bytevalue & 0x1F;
 4279                 $bytesleft = 1;
 4280             } elseif ($bytevalue <= 0xEF) { //1110 xxxx
 4281                 $working = $bytevalue & 0x0F;
 4282                 $bytesleft = 2;
 4283             } else { //1111 0xxx
 4284                 $working = $bytevalue & 0x07;
 4285                 $bytesleft = 3;
 4286             }
 4287         }
 4288         return $result;
 4289     }
 4290 
 4291     /** No bugs detected in iconv. */
 4292     const ICONV_OK = 0;
 4293 
 4294     /** Iconv truncates output if converting from UTF-8 to another
 4295      *  character set with //IGNORE, and a non-encodable character is found */
 4296     const ICONV_TRUNCATES = 1;
 4297 
 4298     /** Iconv does not support //IGNORE, making it unusable for
 4299      *  transcoding purposes */
 4300     const ICONV_UNUSABLE = 2;
 4301 
 4302     /**
 4303      * glibc iconv has a known bug where it doesn't handle the magic
 4304      * //IGNORE stanza correctly.  In particular, rather than ignore
 4305      * characters, it will return an EILSEQ after consuming some number
 4306      * of characters, and expect you to restart iconv as if it were
 4307      * an E2BIG.  Old versions of PHP did not respect the errno, and
 4308      * returned the fragment, so as a result you would see iconv
 4309      * mysteriously truncating output. We can work around this by
 4310      * manually chopping our input into segments of about 8000
 4311      * characters, as long as PHP ignores the error code.  If PHP starts
 4312      * paying attention to the error code, iconv becomes unusable.
 4313      *
 4314      * @return int Error code indicating severity of bug.
 4315      */
 4316     public static function testIconvTruncateBug()
 4317     {
 4318         static $code = null;
 4319         if ($code === null) {
 4320             // better not use iconv, otherwise infinite loop!
 4321             $r = self::unsafeIconv('utf-8', 'ascii//IGNORE', "\xCE\xB1" . str_repeat('a', 9000));
 4322             if ($r === false) {
 4323                 $code = self::ICONV_UNUSABLE;
 4324             } elseif (($c = strlen($r)) < 9000) {
 4325                 $code = self::ICONV_TRUNCATES;
 4326             } elseif ($c > 9000) {
 4327                 trigger_error(
 4328                     'Your copy of iconv is extremely buggy. Please notify HTML Purifier maintainers: ' .
 4329                     'include your iconv version as per phpversion()',
 4330                     E_USER_ERROR
 4331                 );
 4332             } else {
 4333                 $code = self::ICONV_OK;
 4334             }
 4335         }
 4336         return $code;
 4337     }
 4338 
 4339     /**
 4340      * This expensive function tests whether or not a given character
 4341      * encoding supports ASCII. 7/8-bit encodings like Shift_JIS will
 4342      * fail this test, and require special processing. Variable width
 4343      * encodings shouldn't ever fail.
 4344      *
 4345      * @param string $encoding Encoding name to test, as per iconv format
 4346      * @param bool $bypass Whether or not to bypass the precompiled arrays.
 4347      * @return Array of UTF-8 characters to their corresponding ASCII,
 4348      *      which can be used to "undo" any overzealous iconv action.
 4349      */
 4350     public static function testEncodingSupportsASCII($encoding, $bypass = false)
 4351     {
 4352         // All calls to iconv here are unsafe, proof by case analysis:
 4353         // If ICONV_OK, no difference.
 4354         // If ICONV_TRUNCATE, all calls involve one character inputs,
 4355         // so bug is not triggered.
 4356         // If ICONV_UNUSABLE, this call is irrelevant
 4357         static $encodings = array();
 4358         if (!$bypass) {
 4359             if (isset($encodings[$encoding])) {
 4360                 return $encodings[$encoding];
 4361             }
 4362             $lenc = strtolower($encoding);
 4363             switch ($lenc) {
 4364                 case 'shift_jis':
 4365                     return array("\xC2\xA5" => '\\', "\xE2\x80\xBE" => '~');
 4366                 case 'johab':
 4367                     return array("\xE2\x82\xA9" => '\\');
 4368             }
 4369             if (strpos($lenc, 'iso-8859-') === 0) {
 4370                 return array();
 4371             }
 4372         }
 4373         $ret = array();
 4374         if (self::unsafeIconv('UTF-8', $encoding, 'a') === false) {
 4375             return false;
 4376         }
 4377         for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars
 4378             $c = chr($i); // UTF-8 char
 4379             $r = self::unsafeIconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion
 4380             if ($r === '' ||
 4381                 // This line is needed for iconv implementations that do not
 4382                 // omit characters that do not exist in the target character set
 4383                 ($r === $c && self::unsafeIconv($encoding, 'UTF-8//IGNORE', $r) !== $c)
 4384             ) {
 4385                 // Reverse engineer: what's the UTF-8 equiv of this byte
 4386                 // sequence? This assumes that there's no variable width
 4387                 // encoding that doesn't support ASCII.
 4388                 $ret[self::unsafeIconv($encoding, 'UTF-8//IGNORE', $c)] = $c;
 4389             }
 4390         }
 4391         $encodings[$encoding] = $ret;
 4392         return $ret;
 4393     }
 4394 }
 4395 
 4396 
 4397 
 4398 
 4399 
 4400 /**
 4401  * Object that provides entity lookup table from entity name to character
 4402  */
 4403 class HTMLPurifier_EntityLookup
 4404 {
 4405     /**
 4406      * Assoc array of entity name to character represented.
 4407      * @type array
 4408      */
 4409     public $table;
 4410 
 4411     /**
 4412      * Sets up the entity lookup table from the serialized file contents.
 4413      * @param bool $file
 4414      * @note The serialized contents are versioned, but were generated
 4415      *       using the maintenance script generate_entity_file.php
 4416      * @warning This is not in constructor to help enforce the Singleton
 4417      */
 4418     public function setup($file = false)
 4419     {
 4420         if (!$file) {
 4421             $file = HTMLPURIFIER_PREFIX . '/HTMLPurifier/EntityLookup/entities.ser';
 4422         }
 4423         $this->table = unserialize(file_get_contents($file));
 4424     }
 4425 
 4426     /**
 4427      * Retrieves sole instance of the object.
 4428      * @param bool|HTMLPurifier_EntityLookup $prototype Optional prototype of custom lookup table to overload with.
 4429      * @return HTMLPurifier_EntityLookup
 4430      */
 4431     public static function instance($prototype = false)
 4432     {
 4433         // no references, since PHP doesn't copy unless modified
 4434         static $instance = null;
 4435         if ($prototype) {
 4436             $instance = $prototype;
 4437         } elseif (!$instance) {
 4438             $instance = new HTMLPurifier_EntityLookup();
 4439             $instance->setup();
 4440         }
 4441         return $instance;
 4442     }
 4443 }
 4444 
 4445 
 4446 
 4447 
 4448 
 4449 // if want to implement error collecting here, we'll need to use some sort
 4450 // of global data (probably trigger_error) because it's impossible to pass
 4451 // $config or $context to the callback functions.
 4452 
 4453 /**
 4454  * Handles referencing and derefencing character entities
 4455  */
 4456 class HTMLPurifier_EntityParser
 4457 {
 4458 
 4459     /**
 4460      * Reference to entity lookup table.
 4461      * @type HTMLPurifier_EntityLookup
 4462      */
 4463     protected $_entity_lookup;
 4464 
 4465     /**
 4466      * Callback regex string for entities in text.
 4467      * @type string
 4468      */
 4469     protected $_textEntitiesRegex;
 4470 
 4471     /**
 4472      * Callback regex string for entities in attributes.
 4473      * @type string
 4474      */
 4475     protected $_attrEntitiesRegex;
 4476 
 4477     /**
 4478      * Tests if the beginning of a string is a semi-optional regex
 4479      */
 4480     protected $_semiOptionalPrefixRegex;
 4481 
 4482     public function __construct() {
 4483         // From
 4484         // http://stackoverflow.com/questions/15532252/why-is-reg-being-rendered-as-without-the-bounding-semicolon
 4485         $semi_optional = "quot|QUOT|lt|LT|gt|GT|amp|AMP|AElig|Aacute|Acirc|Agrave|Aring|Atilde|Auml|COPY|Ccedil|ETH|Eacute|Ecirc|Egrave|Euml|Iacute|Icirc|Igrave|Iuml|Ntilde|Oacute|Ocirc|Ograve|Oslash|Otilde|Ouml|REG|THORN|Uacute|Ucirc|Ugrave|Uuml|Yacute|aacute|acirc|acute|aelig|agrave|aring|atilde|auml|brvbar|ccedil|cedil|cent|copy|curren|deg|divide|eacute|ecirc|egrave|eth|euml|frac12|frac14|frac34|iacute|icirc|iexcl|igrave|iquest|iuml|laquo|macr|micro|middot|nbsp|not|ntilde|oacute|ocirc|ograve|ordf|ordm|oslash|otilde|ouml|para|plusmn|pound|raquo|reg|sect|shy|sup1|sup2|sup3|szlig|thorn|times|uacute|ucirc|ugrave|uml|uuml|yacute|yen|yuml";
 4486 
 4487         // NB: three empty captures to put the fourth match in the right
 4488         // place
 4489         $this->_semiOptionalPrefixRegex = "/&()()()($semi_optional)/";
 4490 
 4491         $this->_textEntitiesRegex =
 4492             '/&(?:'.
 4493             // hex
 4494             '[#]x([a-fA-F0-9]+);?|'.
 4495             // dec
 4496             '[#]0*(\d+);?|'.
 4497             // string (mandatory semicolon)
 4498             // NB: order matters: match semicolon preferentially
 4499             '([A-Za-z_:][A-Za-z0-9.\-_:]*);|'.
 4500             // string (optional semicolon)
 4501             "($semi_optional)".
 4502             ')/';
 4503 
 4504         $this->_attrEntitiesRegex =
 4505             '/&(?:'.
 4506             // hex
 4507             '[#]x([a-fA-F0-9]+);?|'.
 4508             // dec
 4509             '[#]0*(\d+);?|'.
 4510             // string (mandatory semicolon)
 4511             // NB: order matters: match semicolon preferentially
 4512             '([A-Za-z_:][A-Za-z0-9.\-_:]*);|'.
 4513             // string (optional semicolon)
 4514             // don't match if trailing is equals or alphanumeric (URL
 4515             // like)
 4516             "($semi_optional)(?![=;A-Za-z0-9])".
 4517             ')/';
 4518 
 4519     }
 4520 
 4521     /**
 4522      * Substitute entities with the parsed equivalents.  Use this on
 4523      * textual data in an HTML document (as opposed to attributes.)
 4524      *
 4525      * @param string $string String to have entities parsed.
 4526      * @return string Parsed string.
 4527      */
 4528     public function substituteTextEntities($string)
 4529     {
 4530         return preg_replace_callback(
 4531             $this->_textEntitiesRegex,
 4532             array($this, 'entityCallback'),
 4533             $string
 4534         );
 4535     }
 4536 
 4537     /**
 4538      * Substitute entities with the parsed equivalents.  Use this on
 4539      * attribute contents in documents.
 4540      *
 4541      * @param string $string String to have entities parsed.
 4542      * @return string Parsed string.
 4543      */
 4544     public function substituteAttrEntities($string)
 4545     {
 4546         return preg_replace_callback(
 4547             $this->_attrEntitiesRegex,
 4548             array($this, 'entityCallback'),
 4549             $string
 4550         );
 4551     }
 4552 
 4553     /**
 4554      * Callback function for substituteNonSpecialEntities() that does the work.
 4555      *
 4556      * @param array $matches  PCRE matches array, with 0 the entire match, and
 4557      *                  either index 1, 2 or 3 set with a hex value, dec value,
 4558      *                  or string (respectively).
 4559      * @return string Replacement string.
 4560      */
 4561 
 4562     protected function entityCallback($matches)
 4563     {
 4564         $entity = $matches[0];
 4565         $hex_part = @$matches[1];
 4566         $dec_part = @$matches[2];
 4567         $named_part = empty($matches[3]) ? @$matches[4] : $matches[3];
 4568         if ($hex_part !== NULL && $hex_part !== "") {
 4569             return HTMLPurifier_Encoder::unichr(hexdec($hex_part));
 4570         } elseif ($dec_part !== NULL && $dec_part !== "") {
 4571             return HTMLPurifier_Encoder::unichr((int) $dec_part);
 4572         } else {
 4573             if (!$this->_entity_lookup) {
 4574                 $this->_entity_lookup = HTMLPurifier_EntityLookup::instance();
 4575             }
 4576             if (isset($this->_entity_lookup->table[$named_part])) {
 4577                 return $this->_entity_lookup->table[$named_part];
 4578             } else {
 4579                 // exact match didn't match anything, so test if
 4580                 // any of the semicolon optional match the prefix.
 4581                 // Test that this is an EXACT match is important to
 4582                 // prevent infinite loop
 4583                 if (!empty($matches[3])) {
 4584                     return preg_replace_callback(
 4585                         $this->_semiOptionalPrefixRegex,
 4586                         array($this, 'entityCallback'),
 4587                         $entity
 4588                     );
 4589                 }
 4590                 return $entity;
 4591             }
 4592         }
 4593     }
 4594 
 4595     // LEGACY CODE BELOW
 4596 
 4597     /**
 4598      * Callback regex string for parsing entities.
 4599      * @type string
 4600      */
 4601     protected $_substituteEntitiesRegex =
 4602         '/&(?:[#]x([a-fA-F0-9]+)|[#]0*(\d+)|([A-Za-z_:][A-Za-z0-9.\-_:]*));?/';
 4603     //     1. hex             2. dec      3. string (XML style)
 4604 
 4605     /**
 4606      * Decimal to parsed string conversion table for special entities.
 4607      * @type array
 4608      */
 4609     protected $_special_dec2str =
 4610         array(
 4611             34 => '"',
 4612             38 => '&',
 4613             39 => "'",
 4614             60 => '<',
 4615             62 => '>'
 4616         );
 4617 
 4618     /**
 4619      * Stripped entity names to decimal conversion table for special entities.
 4620      * @type array
 4621      */
 4622     protected $_special_ent2dec =
 4623         array(
 4624             'quot' => 34,
 4625             'amp'  => 38,
 4626             'lt'   => 60,
 4627             'gt'   => 62
 4628         );
 4629 
 4630     /**
 4631      * Substitutes non-special entities with their parsed equivalents. Since
 4632      * running this whenever you have parsed character is t3h 5uck, we run
 4633      * it before everything else.
 4634      *
 4635      * @param string $string String to have non-special entities parsed.
 4636      * @return string Parsed string.
 4637      */
 4638     public function substituteNonSpecialEntities($string)
 4639     {
 4640         // it will try to detect missing semicolons, but don't rely on it
 4641         return preg_replace_callback(
 4642             $this->_substituteEntitiesRegex,
 4643             array($this, 'nonSpecialEntityCallback'),
 4644             $string
 4645         );
 4646     }
 4647 
 4648     /**
 4649      * Callback function for substituteNonSpecialEntities() that does the work.
 4650      *
 4651      * @param array $matches  PCRE matches array, with 0 the entire match, and
 4652      *                  either index 1, 2 or 3 set with a hex value, dec value,
 4653      *                  or string (respectively).
 4654      * @return string Replacement string.
 4655      */
 4656 
 4657     protected function nonSpecialEntityCallback($matches)
 4658     {
 4659         // replaces all but big five
 4660         $entity = $matches[0];
 4661         $is_num = (@$matches[0][1] === '#');
 4662         if ($is_num) {
 4663             $is_hex = (@$entity[2] === 'x');
 4664             $code = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
 4665             // abort for special characters
 4666             if (isset($this->_special_dec2str[$code])) {
 4667                 return $entity;
 4668             }
 4669             return HTMLPurifier_Encoder::unichr($code);
 4670         } else {
 4671             if (isset($this->_special_ent2dec[$matches[3]])) {
 4672                 return $entity;
 4673             }
 4674             if (!$this->_entity_lookup) {
 4675                 $this->_entity_lookup = HTMLPurifier_EntityLookup::instance();
 4676             }
 4677             if (isset($this->_entity_lookup->table[$matches[3]])) {
 4678                 return $this->_entity_lookup->table[$matches[3]];
 4679             } else {
 4680                 return $entity;
 4681             }
 4682         }
 4683     }
 4684 
 4685     /**
 4686      * Substitutes only special entities with their parsed equivalents.
 4687      *
 4688      * @notice We try to avoid calling this function because otherwise, it
 4689      * would have to be called a lot (for every parsed section).
 4690      *
 4691      * @param string $string String to have non-special entities parsed.
 4692      * @return string Parsed string.
 4693      */
 4694     public function substituteSpecialEntities($string)
 4695     {
 4696         return preg_replace_callback(
 4697             $this->_substituteEntitiesRegex,
 4698             array($this, 'specialEntityCallback'),
 4699             $string
 4700         );
 4701     }
 4702 
 4703     /**
 4704      * Callback function for substituteSpecialEntities() that does the work.
 4705      *
 4706      * This callback has same syntax as nonSpecialEntityCallback().
 4707      *
 4708      * @param array $matches  PCRE-style matches array, with 0 the entire match, and
 4709      *                  either index 1, 2 or 3 set with a hex value, dec value,
 4710      *                  or string (respectively).
 4711      * @return string Replacement string.
 4712      */
 4713     protected function specialEntityCallback($matches)
 4714     {
 4715         $entity = $matches[0];
 4716         $is_num = (@$matches[0][1] === '#');
 4717         if ($is_num) {
 4718             $is_hex = (@$entity[2] === 'x');
 4719             $int = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
 4720             return isset($this->_special_dec2str[$int]) ?
 4721                 $this->_special_dec2str[$int] :
 4722                 $entity;
 4723         } else {
 4724             return isset($this->_special_ent2dec[$matches[3]]) ?
 4725                 $this->_special_dec2str[$this->_special_ent2dec[$matches[3]]] :
 4726                 $entity;
 4727         }
 4728     }
 4729 }
 4730 
 4731 
 4732 
 4733 
 4734 
 4735 /**
 4736  * Error collection class that enables HTML Purifier to report HTML
 4737  * problems back to the user
 4738  */
 4739 class HTMLPurifier_ErrorCollector
 4740 {
 4741 
 4742     /**
 4743      * Identifiers for the returned error array. These are purposely numeric
 4744      * so list() can be used.
 4745      */
 4746     const LINENO   = 0;
 4747     const SEVERITY = 1;
 4748     const MESSAGE  = 2;
 4749     const CHILDREN = 3;
 4750 
 4751     /**
 4752      * @type array
 4753      */
 4754     protected $errors;
 4755 
 4756     /**
 4757      * @type array
 4758      */
 4759     protected $_current;
 4760 
 4761     /**
 4762      * @type array
 4763      */
 4764     protected $_stacks = array(array());
 4765 
 4766     /**
 4767      * @type HTMLPurifier_Language
 4768      */
 4769     protected $locale;
 4770 
 4771     /**
 4772      * @type HTMLPurifier_Generator
 4773      */
 4774     protected $generator;
 4775 
 4776     /**
 4777      * @type HTMLPurifier_Context
 4778      */
 4779     protected $context;
 4780 
 4781     /**
 4782      * @type array
 4783      */
 4784     protected $lines = array();
 4785 
 4786     /**
 4787      * @param HTMLPurifier_Context $context
 4788      */
 4789     public function __construct($context)
 4790     {
 4791         $this->locale    =& $context->get('Locale');
 4792         $this->context   = $context;
 4793         $this->_current  =& $this->_stacks[0];
 4794         $this->errors    =& $this->_stacks[0];
 4795     }
 4796 
 4797     /**
 4798      * Sends an error message to the collector for later use
 4799      * @param int $severity Error severity, PHP error style (don't use E_USER_)
 4800      * @param string $msg Error message text
 4801      */
 4802     public function send($severity, $msg)
 4803     {
 4804         $args = array();
 4805         if (func_num_args() > 2) {
 4806             $args = func_get_args();
 4807             array_shift($args);
 4808             unset($args[0]);
 4809         }
 4810 
 4811         $token = $this->context->get('CurrentToken', true);
 4812         $line  = $token ? $token->line : $this->context->get('CurrentLine', true);
 4813         $col   = $token ? $token->col  : $this->context->get('CurrentCol', true);
 4814         $attr  = $this->context->get('CurrentAttr', true);
 4815 
 4816         // perform special substitutions, also add custom parameters
 4817         $subst = array();
 4818         if (!is_null($token)) {
 4819             $args['CurrentToken'] = $token;
 4820         }
 4821         if (!is_null($attr)) {
 4822             $subst['$CurrentAttr.Name'] = $attr;
 4823             if (isset($token->attr[$attr])) {
 4824                 $subst['$CurrentAttr.Value'] = $token->attr[$attr];
 4825             }
 4826         }
 4827 
 4828         if (empty($args)) {
 4829             $msg = $this->locale->getMessage($msg);
 4830         } else {
 4831             $msg = $this->locale->formatMessage($msg, $args);
 4832         }
 4833 
 4834         if (!empty($subst)) {
 4835             $msg = strtr($msg, $subst);
 4836         }
 4837 
 4838         // (numerically indexed)
 4839         $error = array(
 4840             self::LINENO   => $line,
 4841             self::SEVERITY => $severity,
 4842             self::MESSAGE  => $msg,
 4843             self::CHILDREN => array()
 4844         );
 4845         $this->_current[] = $error;
 4846 
 4847         // NEW CODE BELOW ...
 4848         // Top-level errors are either:
 4849         //  TOKEN type, if $value is set appropriately, or
 4850         //  "syntax" type, if $value is null
 4851         $new_struct = new HTMLPurifier_ErrorStruct();
 4852         $new_struct->type = HTMLPurifier_ErrorStruct::TOKEN;
 4853         if ($token) {
 4854             $new_struct->value = clone $token;
 4855         }
 4856         if (is_int($line) && is_int($col)) {
 4857             if (isset($this->lines[$line][$col])) {
 4858                 $struct = $this->lines[$line][$col];
 4859             } else {
 4860                 $struct = $this->lines[$line][$col] = $new_struct;
 4861             }
 4862             // These ksorts may present a performance problem
 4863             ksort($this->lines[$line], SORT_NUMERIC);
 4864         } else {
 4865             if (isset($this->lines[-1])) {
 4866                 $struct = $this->lines[-1];
 4867             } else {
 4868                 $struct = $this->lines[-1] = $new_struct;
 4869             }
 4870         }
 4871         ksort($this->lines, SORT_NUMERIC);
 4872 
 4873         // Now, check if we need to operate on a lower structure
 4874         if (!empty($attr)) {
 4875             $struct = $struct->getChild(HTMLPurifier_ErrorStruct::ATTR, $attr);
 4876             if (!$struct->value) {
 4877                 $struct->value = array($attr, 'PUT VALUE HERE');
 4878             }
 4879         }
 4880         if (!empty($cssprop)) {
 4881             $struct = $struct->getChild(HTMLPurifier_ErrorStruct::CSSPROP, $cssprop);
 4882             if (!$struct->value) {
 4883                 // if we tokenize CSS this might be a little more difficult to do
 4884                 $struct->value = array($cssprop, 'PUT VALUE HERE');
 4885             }
 4886         }
 4887 
 4888         // Ok, structs are all setup, now time to register the error
 4889         $struct->addError($severity, $msg);
 4890     }
 4891 
 4892     /**
 4893      * Retrieves raw error data for custom formatter to use
 4894      */
 4895     public function getRaw()
 4896     {
 4897         return $this->errors;
 4898     }
 4899 
 4900     /**
 4901      * Default HTML formatting implementation for error messages
 4902      * @param HTMLPurifier_Config $config Configuration, vital for HTML output nature
 4903      * @param array $errors Errors array to display; used for recursion.
 4904      * @return string
 4905      */
 4906     public function getHTMLFormatted($config, $errors = null)
 4907     {
 4908         $ret = array();
 4909 
 4910         $this->generator = new HTMLPurifier_Generator($config, $this->context);
 4911         if ($errors === null) {
 4912             $errors = $this->errors;
 4913         }
 4914 
 4915         // 'At line' message needs to be removed
 4916 
 4917         // generation code for new structure goes here. It needs to be recursive.
 4918         foreach ($this->lines as $line => $col_array) {
 4919             if ($line == -1) {
 4920                 continue;
 4921             }
 4922             foreach ($col_array as $col => $struct) {
 4923                 $this->_renderStruct($ret, $struct, $line, $col);
 4924             }
 4925         }
 4926         if (isset($this->lines[-1])) {
 4927             $this->_renderStruct($ret, $this->lines[-1]);
 4928         }
 4929 
 4930         if (empty($errors)) {
 4931             return '<p>' . $this->locale->getMessage('ErrorCollector: No errors') . '</p>';
 4932         } else {
 4933             return '<ul><li>' . implode('</li><li>', $ret) . '</li></ul>';
 4934         }
 4935 
 4936     }
 4937 
 4938     private function _renderStruct(&$ret, $struct, $line = null, $col = null)
 4939     {
 4940         $stack = array($struct);
 4941         $context_stack = array(array());
 4942         while ($current = array_pop($stack)) {
 4943             $context = array_pop($context_stack);
 4944             foreach ($current->errors as $error) {
 4945                 list($severity, $msg) = $error;
 4946                 $string = '';
 4947                 $string .= '<div>';
 4948                 // W3C uses an icon to indicate the severity of the error.
 4949                 $error = $this->locale->getErrorName($severity);
 4950                 $string .= "<span class=\"error e$severity\"><strong>$error</strong></span> ";
 4951                 if (!is_null($line) && !is_null($col)) {
 4952                     $string .= "<em class=\"location\">Line $line, Column $col: </em> ";
 4953                 } else {
 4954                     $string .= '<em class="location">End of Document: </em> ';
 4955                 }
 4956                 $string .= '<strong class="description">' . $this->generator->escape($msg) . '</strong> ';
 4957                 $string .= '</div>';
 4958                 // Here, have a marker for the character on the column appropriate.
 4959                 // Be sure to clip extremely long lines.
 4960                 //$string .= '<pre>';
 4961                 //$string .= '';
 4962                 //$string .= '</pre>';
 4963                 $ret[] = $string;
 4964             }
 4965             foreach ($current->children as $array) {
 4966                 $context[] = $current;
 4967                 $stack = array_merge($stack, array_reverse($array, true));
 4968                 for ($i = count($array); $i > 0; $i--) {
 4969                     $context_stack[] = $context;
 4970                 }
 4971             }
 4972         }
 4973     }
 4974 }
 4975 
 4976 
 4977 
 4978 
 4979 
 4980 /**
 4981  * Records errors for particular segments of an HTML document such as tokens,
 4982  * attributes or CSS properties. They can contain error structs (which apply
 4983  * to components of what they represent), but their main purpose is to hold
 4984  * errors applying to whatever struct is being used.
 4985  */
 4986 class HTMLPurifier_ErrorStruct
 4987 {
 4988 
 4989     /**
 4990      * Possible values for $children first-key. Note that top-level structures
 4991      * are automatically token-level.
 4992      */
 4993     const TOKEN     = 0;
 4994     const ATTR      = 1;
 4995     const CSSPROP   = 2;
 4996 
 4997     /**
 4998      * Type of this struct.
 4999      * @type string
 5000      */
 5001     public $type;
 5002 
 5003     /**
 5004      * Value of the struct we are recording errors for. There are various
 5005      * values for this:
 5006      *  - TOKEN: Instance of HTMLPurifier_Token
 5007      *  - ATTR: array('attr-name', 'value')
 5008      *  - CSSPROP: array('prop-name', 'value')
 5009      * @type mixed
 5010      */
 5011     public $value;
 5012 
 5013     /**
 5014      * Errors registered for this structure.
 5015      * @type array
 5016      */
 5017     public $errors = array();
 5018 
 5019     /**
 5020      * Child ErrorStructs that are from this structure. For example, a TOKEN
 5021      * ErrorStruct would contain ATTR ErrorStructs. This is a multi-dimensional
 5022      * array in structure: [TYPE]['identifier']
 5023      * @type array
 5024      */
 5025     public $children = array();
 5026 
 5027     /**
 5028      * @param string $type
 5029      * @param string $id
 5030      * @return mixed
 5031      */
 5032     public function getChild($type, $id)
 5033     {
 5034         if (!isset($this->children[$type][$id])) {
 5035             $this->children[$type][$id] = new HTMLPurifier_ErrorStruct();
 5036             $this->children[$type][$id]->type = $type;
 5037         }
 5038         return $this->children[$type][$id];
 5039     }
 5040 
 5041     /**
 5042      * @param int $severity
 5043      * @param string $message
 5044      */
 5045     public function addError($severity, $message)
 5046     {
 5047         $this->errors[] = array($severity, $message);
 5048     }
 5049 }
 5050 
 5051 
 5052 
 5053 
 5054 
 5055 /**
 5056  * Global exception class for HTML Purifier; any exceptions we throw
 5057  * are from here.
 5058  */
 5059 class HTMLPurifier_Exception extends Exception
 5060 {
 5061 
 5062 }
 5063 
 5064 
 5065 
 5066 
 5067 
 5068 /**
 5069  * Represents a pre or post processing filter on HTML Purifier's output
 5070  *
 5071  * Sometimes, a little ad-hoc fixing of HTML has to be done before
 5072  * it gets sent through HTML Purifier: you can use filters to acheive
 5073  * this effect. For instance, YouTube videos can be preserved using
 5074  * this manner. You could have used a decorator for this task, but
 5075  * PHP's support for them is not terribly robust, so we're going
 5076  * to just loop through the filters.
 5077  *
 5078  * Filters should be exited first in, last out. If there are three filters,
 5079  * named 1, 2 and 3, the order of execution should go 1->preFilter,
 5080  * 2->preFilter, 3->preFilter, purify, 3->postFilter, 2->postFilter,
 5081  * 1->postFilter.
 5082  *
 5083  * @note Methods are not declared abstract as it is perfectly legitimate
 5084  *       for an implementation not to want anything to happen on a step
 5085  */
 5086 
 5087 class HTMLPurifier_Filter
 5088 {
 5089 
 5090     /**
 5091      * Name of the filter for identification purposes.
 5092      * @type string
 5093      */
 5094     public $name;
 5095 
 5096     /**
 5097      * Pre-processor function, handles HTML before HTML Purifier
 5098      * @param string $html
 5099      * @param HTMLPurifier_Config $config
 5100      * @param HTMLPurifier_Context $context
 5101      * @return string
 5102      */
 5103     public function preFilter($html, $config, $context)
 5104     {
 5105         return $html;
 5106     }
 5107 
 5108     /**
 5109      * Post-processor function, handles HTML after HTML Purifier
 5110      * @param string $html
 5111      * @param HTMLPurifier_Config $config
 5112      * @param HTMLPurifier_Context $context
 5113      * @return string
 5114      */
 5115     public function postFilter($html, $config, $context)
 5116     {
 5117         return $html;
 5118     }
 5119 }
 5120 
 5121 
 5122 
 5123 
 5124 
 5125 /**
 5126  * Generates HTML from tokens.
 5127  * @todo Refactor interface so that configuration/context is determined
 5128  *       upon instantiation, no need for messy generateFromTokens() calls
 5129  * @todo Make some of the more internal functions protected, and have
 5130  *       unit tests work around that
 5131  */
 5132 class HTMLPurifier_Generator
 5133 {
 5134 
 5135     /**
 5136      * Whether or not generator should produce XML output.
 5137      * @type bool
 5138      */
 5139     private $_xhtml = true;
 5140 
 5141     /**
 5142      * :HACK: Whether or not generator should comment the insides of <script> tags.
 5143      * @type bool
 5144      */
 5145     private $_scriptFix = false;
 5146 
 5147     /**
 5148      * Cache of HTMLDefinition during HTML output to determine whether or
 5149      * not attributes should be minimized.
 5150      * @type HTMLPurifier_HTMLDefinition
 5151      */
 5152     private $_def;
 5153 
 5154     /**
 5155      * Cache of %Output.SortAttr.
 5156      * @type bool
 5157      */
 5158     private $_sortAttr;
 5159 
 5160     /**
 5161      * Cache of %Output.FlashCompat.
 5162      * @type bool
 5163      */
 5164     private $_flashCompat;
 5165 
 5166     /**
 5167      * Cache of %Output.FixInnerHTML.
 5168      * @type bool
 5169      */
 5170     private $_innerHTMLFix;
 5171 
 5172     /**
 5173      * Stack for keeping track of object information when outputting IE
 5174      * compatibility code.
 5175      * @type array
 5176      */
 5177     private $_flashStack = array();
 5178 
 5179     /**
 5180      * Configuration for the generator
 5181      * @type HTMLPurifier_Config
 5182      */
 5183     protected $config;
 5184 
 5185     /**
 5186      * @param HTMLPurifier_Config $config
 5187      * @param HTMLPurifier_Context $context
 5188      */
 5189     public function __construct($config, $context)
 5190     {
 5191         $this->config = $config;
 5192         $this->_scriptFix = $config->get('Output.CommentScriptContents');
 5193         $this->_innerHTMLFix = $config->get('Output.FixInnerHTML');
 5194         $this->_sortAttr = $config->get('Output.SortAttr');
 5195         $this->_flashCompat = $config->get('Output.FlashCompat');
 5196         $this->_def = $config->getHTMLDefinition();
 5197         $this->_xhtml = $this->_def->doctype->xml;
 5198     }
 5199 
 5200     /**
 5201      * Generates HTML from an array of tokens.
 5202      * @param HTMLPurifier_Token[] $tokens Array of HTMLPurifier_Token
 5203      * @return string Generated HTML
 5204      */
 5205     public function generateFromTokens($tokens)
 5206     {
 5207         if (!$tokens) {
 5208             return '';
 5209         }
 5210 
 5211         // Basic algorithm
 5212         $html = '';
 5213         for ($i = 0, $size = count($tokens); $i < $size; $i++) {
 5214             if ($this->_scriptFix && $tokens[$i]->name === 'script'
 5215                 && $i + 2 < $size && $tokens[$i+2] instanceof HTMLPurifier_Token_End) {
 5216                 // script special case
 5217                 // the contents of the script block must be ONE token
 5218                 // for this to work.
 5219                 $html .= $this->generateFromToken($tokens[$i++]);
 5220                 $html .= $this->generateScriptFromToken($tokens[$i++]);
 5221             }
 5222             $html .= $this->generateFromToken($tokens[$i]);
 5223         }
 5224 
 5225         // Tidy cleanup
 5226         if (extension_loaded('tidy') && $this->config->get('Output.TidyFormat')) {
 5227             $tidy = new Tidy;
 5228             $tidy->parseString(
 5229                 $html,
 5230                 array(
 5231                     'indent'=> true,
 5232                     'output-xhtml' => $this->_xhtml,
 5233                     'show-body-only' => true,
 5234                     'indent-spaces' => 2,
 5235                     'wrap' => 68,
 5236                 ),
 5237                 'utf8'
 5238             );
 5239             $tidy->cleanRepair();
 5240             $html = (string) $tidy; // explicit cast necessary
 5241         }
 5242 
 5243         // Normalize newlines to system defined value
 5244         if ($this->config->get('Core.NormalizeNewlines')) {
 5245             $nl = $this->config->get('Output.Newline');
 5246             if ($nl === null) {
 5247                 $nl = PHP_EOL;
 5248             }
 5249             if ($nl !== "\n") {
 5250                 $html = str_replace("\n", $nl, $html);
 5251             }
 5252         }
 5253         return $html;
 5254     }
 5255 
 5256     /**
 5257      * Generates HTML from a single token.
 5258      * @param HTMLPurifier_Token $token HTMLPurifier_Token object.
 5259      * @return string Generated HTML
 5260      */
 5261     public function generateFromToken($token)
 5262     {
 5263         if (!$token instanceof HTMLPurifier_Token) {
 5264             trigger_error('Cannot generate HTML from non-HTMLPurifier_Token object', E_USER_WARNING);
 5265             return '';
 5266 
 5267         } elseif ($token instanceof HTMLPurifier_Token_Start) {
 5268             $attr = $this->generateAttributes($token->attr, $token->name);
 5269             if ($this->_flashCompat) {
 5270                 if ($token->name == "object") {
 5271                     $flash = new stdClass();
 5272                     $flash->attr = $token->attr;
 5273                     $flash->param = array();
 5274                     $this->_flashStack[] = $flash;
 5275                 }
 5276             }
 5277             return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
 5278 
 5279         } elseif ($token instanceof HTMLPurifier_Token_End) {
 5280             $_extra = '';
 5281             if ($this->_flashCompat) {
 5282                 if ($token->name == "object" && !empty($this->_flashStack)) {
 5283                     // doesn't do anything for now
 5284                 }
 5285             }
 5286             return $_extra . '</' . $token->name . '>';
 5287 
 5288         } elseif ($token instanceof HTMLPurifier_Token_Empty) {
 5289             if ($this->_flashCompat && $token->name == "param" && !empty($this->_flashStack)) {
 5290                 $this->_flashStack[count($this->_flashStack)-1]->param[$token->attr['name']] = $token->attr['value'];
 5291             }
 5292             $attr = $this->generateAttributes($token->attr, $token->name);
 5293             return '<' . $token->name . ($attr ? ' ' : '') . $attr .
 5294                 ( $this->_xhtml ? ' /': '' ) // <br /> v. <br>
 5295                 . '>';
 5296 
 5297         } elseif ($token instanceof HTMLPurifier_Token_Text) {
 5298             return $this->escape($token->data, ENT_NOQUOTES);
 5299 
 5300         } elseif ($token instanceof HTMLPurifier_Token_Comment) {
 5301             return '<!--' . $token->data . '-->';
 5302         } else {
 5303             return '';
 5304 
 5305         }
 5306     }
 5307 
 5308     /**
 5309      * Special case processor for the contents of script tags
 5310      * @param HTMLPurifier_Token $token HTMLPurifier_Token object.
 5311      * @return string
 5312      * @warning This runs into problems if there's already a literal
 5313      *          --> somewhere inside the script contents.
 5314      */
 5315     public function generateScriptFromToken($token)
 5316     {
 5317         if (!$token instanceof HTMLPurifier_Token_Text) {
 5318             return $this->generateFromToken($token);
 5319         }
 5320         // Thanks <http://lachy.id.au/log/2005/05/script-comments>
 5321         $data = preg_replace('#//\s*$#', '', $token->data);
 5322         return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>';
 5323     }
 5324 
 5325     /**
 5326      * Generates attribute declarations from attribute array.
 5327      * @note This does not include the leading or trailing space.
 5328      * @param array $assoc_array_of_attributes Attribute array
 5329      * @param string $element Name of element attributes are for, used to check
 5330      *        attribute minimization.
 5331      * @return string Generated HTML fragment for insertion.
 5332      */
 5333     public function generateAttributes($assoc_array_of_attributes, $element = '')
 5334     {
 5335         $html = '';
 5336         if ($this->_sortAttr) {
 5337             ksort($assoc_array_of_attributes);
 5338         }
 5339         foreach ($assoc_array_of_attributes as $key => $value) {
 5340             if (!$this->_xhtml) {
 5341                 // Remove namespaced attributes
 5342                 if (strpos($key, ':') !== false) {
 5343                     continue;
 5344                 }
 5345                 // Check if we should minimize the attribute: val="val" -> val
 5346                 if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) {
 5347                     $html .= $key . ' ';
 5348                     continue;
 5349                 }
 5350             }
 5351             // Workaround for Internet Explorer innerHTML bug.
 5352             // Essentially, Internet Explorer, when calculating
 5353             // innerHTML, omits quotes if there are no instances of
 5354             // angled brackets, quotes or spaces.  However, when parsing
 5355             // HTML (for example, when you assign to innerHTML), it
 5356             // treats backticks as quotes.  Thus,
 5357             //      <img alt="``" />
 5358             // becomes
 5359             //      <img alt=`` />
 5360             // becomes
 5361             //      <img alt='' />
 5362             // Fortunately, all we need to do is trigger an appropriate
 5363             // quoting style, which we do by adding an extra space.
 5364             // This also is consistent with the W3C spec, which states
 5365             // that user agents may ignore leading or trailing
 5366             // whitespace (in fact, most don't, at least for attributes
 5367             // like alt, but an extra space at the end is barely
 5368             // noticeable).  Still, we have a configuration knob for
 5369             // this, since this transformation is not necesary if you
 5370             // don't process user input with innerHTML or you don't plan
 5371             // on supporting Internet Explorer.
 5372             if ($this->_innerHTMLFix) {
 5373                 if (strpos($value, '`') !== false) {
 5374                     // check if correct quoting style would not already be
 5375                     // triggered
 5376                     if (strcspn($value, '"\' <>') === strlen($value)) {
 5377                         // protect!
 5378                         $value .= ' ';
 5379                     }
 5380                 }
 5381             }
 5382             $html .= $key.'="'.$this->escape($value).'" ';
 5383         }
 5384         return rtrim($html);
 5385     }
 5386 
 5387     /**
 5388      * Escapes raw text data.
 5389      * @todo This really ought to be protected, but until we have a facility
 5390      *       for properly generating HTML here w/o using tokens, it stays
 5391      *       public.
 5392      * @param string $string String data to escape for HTML.
 5393      * @param int $quote Quoting style, like htmlspecialchars. ENT_NOQUOTES is
 5394      *               permissible for non-attribute output.
 5395      * @return string escaped data.
 5396      */
 5397     public function escape($string, $quote = null)
 5398     {
 5399         // Workaround for APC bug on Mac Leopard reported by sidepodcast
 5400         // http://htmlpurifier.org/phorum/read.php?3,4823,4846
 5401         if ($quote === null) {
 5402             $quote = ENT_COMPAT;
 5403         }
 5404         return htmlspecialchars($string, $quote, 'UTF-8');
 5405     }
 5406 }
 5407 
 5408 
 5409 
 5410 
 5411 
 5412 /**
 5413  * Definition of the purified HTML that describes allowed children,
 5414  * attributes, and many other things.
 5415  *
 5416  * Conventions:
 5417  *
 5418  * All member variables that are prefixed with info
 5419  * (including the main $info array) are used by HTML Purifier internals
 5420  * and should not be directly edited when customizing the HTMLDefinition.
 5421  * They can usually be set via configuration directives or custom
 5422  * modules.
 5423  *
 5424  * On the other hand, member variables without the info prefix are used
 5425  * internally by the HTMLDefinition and MUST NOT be used by other HTML
 5426  * Purifier internals. Many of them, however, are public, and may be
 5427  * edited by userspace code to tweak the behavior of HTMLDefinition.
 5428  *
 5429  * @note This class is inspected by Printer_HTMLDefinition; please
 5430  *       update that class if things here change.
 5431  *
 5432  * @warning Directives that change this object's structure must be in
 5433  *          the HTML or Attr namespace!
 5434  */
 5435 class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
 5436 {
 5437 
 5438     // FULLY-PUBLIC VARIABLES ---------------------------------------------
 5439 
 5440     /**
 5441      * Associative array of element names to HTMLPurifier_ElementDef.
 5442      * @type HTMLPurifier_ElementDef[]
 5443      */
 5444     public $info = array();
 5445 
 5446     /**
 5447      * Associative array of global attribute name to attribute definition.
 5448      * @type array
 5449      */
 5450     public $info_global_attr = array();
 5451 
 5452     /**
 5453      * String name of parent element HTML will be going into.
 5454      * @type string
 5455      */
 5456     public $info_parent = 'div';
 5457 
 5458     /**
 5459      * Definition for parent element, allows parent element to be a
 5460      * tag that's not allowed inside the HTML fragment.
 5461      * @type HTMLPurifier_ElementDef
 5462      */
 5463     public $info_parent_def;
 5464 
 5465     /**
 5466      * String name of element used to wrap inline elements in block context.
 5467      * @type string
 5468      * @note This is rarely used except for BLOCKQUOTEs in strict mode
 5469      */
 5470     public $info_block_wrapper = 'p';
 5471 
 5472     /**
 5473      * Associative array of deprecated tag name to HTMLPurifier_TagTransform.
 5474      * @type array
 5475      */
 5476     public $info_tag_transform = array();
 5477 
 5478     /**
 5479      * Indexed list of HTMLPurifier_AttrTransform to be performed before validation.
 5480      * @type HTMLPurifier_AttrTransform[]
 5481      */
 5482     public $info_attr_transform_pre = array();
 5483 
 5484     /**
 5485      * Indexed list of HTMLPurifier_AttrTransform to be performed after validation.
 5486      * @type HTMLPurifier_AttrTransform[]
 5487      */
 5488     public $info_attr_transform_post = array();
 5489 
 5490     /**
 5491      * Nested lookup array of content set name (Block, Inline) to
 5492      * element name to whether or not it belongs in that content set.
 5493      * @type array
 5494      */
 5495     public $info_content_sets = array();
 5496 
 5497     /**
 5498      * Indexed list of HTMLPurifier_Injector to be used.
 5499      * @type HTMLPurifier_Injector[]
 5500      */
 5501     public $info_injector = array();
 5502 
 5503     /**
 5504      * Doctype object
 5505      * @type HTMLPurifier_Doctype
 5506      */
 5507     public $doctype;
 5508 
 5509 
 5510 
 5511     // RAW CUSTOMIZATION STUFF --------------------------------------------
 5512 
 5513     /**
 5514      * Adds a custom attribute to a pre-existing element
 5515      * @note This is strictly convenience, and does not have a corresponding
 5516      *       method in HTMLPurifier_HTMLModule
 5517      * @param string $element_name Element name to add attribute to
 5518      * @param string $attr_name Name of attribute
 5519      * @param mixed $def Attribute definition, can be string or object, see
 5520      *             HTMLPurifier_AttrTypes for details
 5521      */
 5522     public function addAttribute($element_name, $attr_name, $def)
 5523     {
 5524         $module = $this->getAnonymousModule();
 5525         if (!isset($module->info[$element_name])) {
 5526             $element = $module->addBlankElement($element_name);
 5527         } else {
 5528             $element = $module->info[$element_name];
 5529         }
 5530         $element->attr[$attr_name] = $def;
 5531     }
 5532 
 5533     /**
 5534      * Adds a custom element to your HTML definition
 5535      * @see HTMLPurifier_HTMLModule::addElement() for detailed
 5536      *       parameter and return value descriptions.
 5537      */
 5538     public function addElement($element_name, $type, $contents, $attr_collections, $attributes = array())
 5539     {
 5540         $module = $this->getAnonymousModule();
 5541         // assume that if the user is calling this, the element
 5542         // is safe. This may not be a good idea
 5543         $element = $module->addElement($element_name, $type, $contents, $attr_collections, $attributes);
 5544         return $element;
 5545     }
 5546 
 5547     /**
 5548      * Adds a blank element to your HTML definition, for overriding
 5549      * existing behavior
 5550      * @param string $element_name
 5551      * @return HTMLPurifier_ElementDef
 5552      * @see HTMLPurifier_HTMLModule::addBlankElement() for detailed
 5553      *       parameter and return value descriptions.
 5554      */
 5555     public function addBlankElement($element_name)
 5556     {
 5557         $module  = $this->getAnonymousModule();
 5558         $element = $module->addBlankElement($element_name);
 5559         return $element;
 5560     }
 5561 
 5562     /**
 5563      * Retrieves a reference to the anonymous module, so you can
 5564      * bust out advanced features without having to make your own
 5565      * module.
 5566      * @return HTMLPurifier_HTMLModule
 5567      */
 5568     public function getAnonymousModule()
 5569     {
 5570         if (!$this->_anonModule) {
 5571             $this->_anonModule = new HTMLPurifier_HTMLModule();
 5572             $this->_anonModule->name = 'Anonymous';
 5573         }
 5574         return $this->_anonModule;
 5575     }
 5576 
 5577     private $_anonModule = null;
 5578 
 5579     // PUBLIC BUT INTERNAL VARIABLES --------------------------------------