"Fossies" - the Fresh Open Source Software Archive

Member "drupal-8.9.10/core/lib/Drupal/Component/Utility/UrlHelper.php" (26 Nov 2020, 15677 Bytes) of package /linux/www/drupal-8.9.10.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) PHP source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "UrlHelper.php" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 9.0.8_vs_9.1.0-rc1.

    1 <?php
    2 
    3 namespace Drupal\Component\Utility;
    4 
    5 /**
    6  * Helper class URL based methods.
    7  *
    8  * @ingroup utility
    9  */
   10 class UrlHelper {
   11 
   12   /**
   13    * The list of allowed protocols.
   14    *
   15    * @var array
   16    */
   17   protected static $allowedProtocols = ['http', 'https'];
   18 
   19   /**
   20    * Parses an array into a valid, rawurlencoded query string.
   21    *
   22    * Function rawurlencode() is RFC3986 compliant, and as a consequence RFC3987
   23    * compliant. The latter defines the required format of "URLs" in HTML5.
   24    * urlencode() is almost the same as rawurlencode(), except that it encodes
   25    * spaces as "+" instead of "%20". This makes its result non compliant to
   26    * RFC3986 and as a consequence non compliant to RFC3987 and as a consequence
   27    * not valid as a "URL" in HTML5.
   28    *
   29    * @param array $query
   30    *   The query parameter array to be processed; for instance,
   31    *   \Drupal::request()->query->all().
   32    * @param string $parent
   33    *   (optional) Internal use only. Used to build the $query array key for
   34    *   nested items. Defaults to an empty string.
   35    *
   36    * @return string
   37    *   A rawurlencoded string which can be used as or appended to the URL query
   38    *   string.
   39    *
   40    * @ingroup php_wrappers
   41    */
   42   public static function buildQuery(array $query, $parent = '') {
   43     $params = [];
   44 
   45     foreach ($query as $key => $value) {
   46       $key = ($parent ? $parent . rawurlencode('[' . $key . ']') : rawurlencode($key));
   47 
   48       // Recurse into children.
   49       if (is_array($value)) {
   50         $params[] = static::buildQuery($value, $key);
   51       }
   52       // If a query parameter value is NULL, only append its key.
   53       elseif (!isset($value)) {
   54         $params[] = $key;
   55       }
   56       else {
   57         // For better readability of paths in query strings, we decode slashes.
   58         $params[] = $key . '=' . str_replace('%2F', '/', rawurlencode($value));
   59       }
   60     }
   61 
   62     return implode('&', $params);
   63   }
   64 
   65   /**
   66    * Filters a URL query parameter array to remove unwanted elements.
   67    *
   68    * @param array $query
   69    *   An array to be processed.
   70    * @param array $exclude
   71    *   (optional) A list of $query array keys to remove. Use "parent[child]" to
   72    *   exclude nested items.
   73    * @param string $parent
   74    *   Internal use only. Used to build the $query array key for nested items.
   75    *
   76    * @return
   77    *   An array containing query parameters.
   78    */
   79   public static function filterQueryParameters(array $query, array $exclude = [], $parent = '') {
   80     // If $exclude is empty, there is nothing to filter.
   81     if (empty($exclude)) {
   82       return $query;
   83     }
   84     elseif (!$parent) {
   85       $exclude = array_flip($exclude);
   86     }
   87 
   88     $params = [];
   89     foreach ($query as $key => $value) {
   90       $string_key = ($parent ? $parent . '[' . $key . ']' : $key);
   91       if (isset($exclude[$string_key])) {
   92         continue;
   93       }
   94 
   95       if (is_array($value)) {
   96         $params[$key] = static::filterQueryParameters($value, $exclude, $string_key);
   97       }
   98       else {
   99         $params[$key] = $value;
  100       }
  101     }
  102 
  103     return $params;
  104   }
  105 
  106   /**
  107    * Parses a URL string into its path, query, and fragment components.
  108    *
  109    * This function splits both internal paths like @code node?b=c#d @endcode and
  110    * external URLs like @code https://example.com/a?b=c#d @endcode into their
  111    * component parts. See
  112    * @link http://tools.ietf.org/html/rfc3986#section-3 RFC 3986 @endlink for an
  113    * explanation of what the component parts are.
  114    *
  115    * Note that, unlike the RFC, when passed an external URL, this function
  116    * groups the scheme, authority, and path together into the path component.
  117    *
  118    * @param string $url
  119    *   The internal path or external URL string to parse.
  120    *
  121    * @return array
  122    *   An associative array containing:
  123    *   - path: The path component of $url. If $url is an external URL, this
  124    *     includes the scheme, authority, and path.
  125    *   - query: An array of query parameters from $url, if they exist.
  126    *   - fragment: The fragment component from $url, if it exists.
  127    *
  128    * @see \Drupal\Core\Utility\LinkGenerator
  129    * @see http://tools.ietf.org/html/rfc3986
  130    *
  131    * @ingroup php_wrappers
  132    */
  133   public static function parse($url) {
  134     $options = [
  135       'path' => NULL,
  136       'query' => [],
  137       'fragment' => '',
  138     ];
  139 
  140     // External URLs: not using parse_url() here, so we do not have to rebuild
  141     // the scheme, host, and path without having any use for it.
  142     // The URL is considered external if it contains the '://' delimiter. Since
  143     // a URL can also be passed as a query argument, we check if this delimiter
  144     // appears in front of the '?' query argument delimiter.
  145     $scheme_delimiter_position = strpos($url, '://');
  146     $query_delimiter_position = strpos($url, '?');
  147     if ($scheme_delimiter_position !== FALSE && ($query_delimiter_position === FALSE || $scheme_delimiter_position < $query_delimiter_position)) {
  148       // Split off the fragment, if any.
  149       if (strpos($url, '#') !== FALSE) {
  150         list($url, $options['fragment']) = explode('#', $url, 2);
  151       }
  152 
  153       // Split off everything before the query string into 'path'.
  154       $parts = explode('?', $url, 2);
  155 
  156       // Don't support URLs without a path, like 'http://'.
  157       list(, $path) = explode('://', $parts[0], 2);
  158       if ($path != '') {
  159         $options['path'] = $parts[0];
  160       }
  161       // If there is a query string, transform it into keyed query parameters.
  162       if (isset($parts[1])) {
  163         parse_str($parts[1], $options['query']);
  164       }
  165     }
  166     // Internal URLs.
  167     else {
  168       // parse_url() does not support relative URLs, so make it absolute. For
  169       // instance, the relative URL "foo/bar:1" isn't properly parsed.
  170       $parts = parse_url('http://example.com/' . $url);
  171       // Strip the leading slash that was just added.
  172       $options['path'] = substr($parts['path'], 1);
  173       if (isset($parts['query'])) {
  174         parse_str($parts['query'], $options['query']);
  175       }
  176       if (isset($parts['fragment'])) {
  177         $options['fragment'] = $parts['fragment'];
  178       }
  179     }
  180 
  181     return $options;
  182   }
  183 
  184   /**
  185    * Encodes a Drupal path for use in a URL.
  186    *
  187    * For aesthetic reasons slashes are not escaped.
  188    *
  189    * @param string $path
  190    *   The Drupal path to encode.
  191    *
  192    * @return string
  193    *   The encoded path.
  194    */
  195   public static function encodePath($path) {
  196     return str_replace('%2F', '/', rawurlencode($path));
  197   }
  198 
  199   /**
  200    * Determines whether a path is external to Drupal.
  201    *
  202    * An example of an external path is http://example.com. If a path cannot be
  203    * assessed by Drupal's menu handler, then we must treat it as potentially
  204    * insecure.
  205    *
  206    * @param string $path
  207    *   The internal path or external URL being linked to, such as "node/34" or
  208    *   "http://example.com/foo".
  209    *
  210    * @return bool
  211    *   TRUE or FALSE, where TRUE indicates an external path.
  212    */
  213   public static function isExternal($path) {
  214     $colonpos = strpos($path, ':');
  215     // Some browsers treat \ as / so normalize to forward slashes.
  216     $path = str_replace('\\', '/', $path);
  217     // If the path starts with 2 slashes then it is always considered an
  218     // external URL without an explicit protocol part.
  219     return (strpos($path, '//') === 0)
  220       // Leading control characters may be ignored or mishandled by browsers,
  221       // so assume such a path may lead to an external location. The \p{C}
  222       // character class matches all UTF-8 control, unassigned, and private
  223       // characters.
  224       || (preg_match('/^\p{C}/u', $path) !== 0)
  225       // Avoid calling static::stripDangerousProtocols() if there is any slash
  226       // (/), hash (#) or question_mark (?) before the colon (:) occurrence -
  227       // if any - as this would clearly mean it is not a URL.
  228       || ($colonpos !== FALSE
  229         && !preg_match('![/?#]!', substr($path, 0, $colonpos))
  230         && static::stripDangerousProtocols($path) == $path);
  231   }
  232 
  233   /**
  234    * Determines if an external URL points to this installation.
  235    *
  236    * @param string $url
  237    *   A string containing an external URL, such as "http://example.com/foo".
  238    * @param string $base_url
  239    *   The base URL string to check against, such as "http://example.com/"
  240    *
  241    * @return bool
  242    *   TRUE if the URL has the same domain and base path.
  243    *
  244    * @throws \InvalidArgumentException
  245    *   Exception thrown when a either $url or $bath_url are not fully qualified.
  246    */
  247   public static function externalIsLocal($url, $base_url) {
  248     // Some browsers treat \ as / so normalize to forward slashes.
  249     $url = str_replace('\\', '/', $url);
  250 
  251     // Leading control characters may be ignored or mishandled by browsers, so
  252     // assume such a path may lead to an non-local location. The \p{C} character
  253     // class matches all UTF-8 control, unassigned, and private characters.
  254     if (preg_match('/^\p{C}/u', $url) !== 0) {
  255       return FALSE;
  256     }
  257 
  258     $url_parts = parse_url($url);
  259     $base_parts = parse_url($base_url);
  260 
  261     if (empty($base_parts['host']) || empty($url_parts['host'])) {
  262       throw new \InvalidArgumentException('A path was passed when a fully qualified domain was expected.');
  263     }
  264 
  265     if (!isset($url_parts['path']) || !isset($base_parts['path'])) {
  266       return (!isset($base_parts['path']) || $base_parts['path'] == '/')
  267         && ($url_parts['host'] == $base_parts['host']);
  268     }
  269     else {
  270       // When comparing base paths, we need a trailing slash to make sure a
  271       // partial URL match isn't occurring. Since base_path() always returns
  272       // with a trailing slash, we don't need to add the trailing slash here.
  273       return ($url_parts['host'] == $base_parts['host'] && stripos($url_parts['path'], $base_parts['path']) === 0);
  274     }
  275   }
  276 
  277   /**
  278    * Processes an HTML attribute value and strips dangerous protocols from URLs.
  279    *
  280    * @param string $string
  281    *   The string with the attribute value.
  282    *
  283    * @return string
  284    *   Cleaned up and HTML-escaped version of $string.
  285    */
  286   public static function filterBadProtocol($string) {
  287     // Get the plain text representation of the attribute value (i.e. its
  288     // meaning).
  289     $string = Html::decodeEntities($string);
  290     return Html::escape(static::stripDangerousProtocols($string));
  291   }
  292 
  293   /**
  294    * Gets the allowed protocols.
  295    *
  296    * @return array
  297    *   An array of protocols, for example http, https and irc.
  298    */
  299   public static function getAllowedProtocols() {
  300     return static::$allowedProtocols;
  301   }
  302 
  303   /**
  304    * Sets the allowed protocols.
  305    *
  306    * @param array $protocols
  307    *   An array of protocols, for example http, https and irc.
  308    */
  309   public static function setAllowedProtocols(array $protocols = []) {
  310     static::$allowedProtocols = $protocols;
  311   }
  312 
  313   /**
  314    * Strips dangerous protocols (for example, 'javascript:') from a URI.
  315    *
  316    * This function must be called for all URIs within user-entered input prior
  317    * to being output to an HTML attribute value. It is often called as part of
  318    * \Drupal\Component\Utility\UrlHelper::filterBadProtocol() or
  319    * \Drupal\Component\Utility\Xss::filter(), but those functions return an
  320    * HTML-encoded string, so this function can be called independently when the
  321    * output needs to be a plain-text string for passing to functions that will
  322    * call Html::escape() separately. The exact behavior depends on the value:
  323    * - If the value is a well-formed (per RFC 3986) relative URL or
  324    *   absolute URL that does not use a dangerous protocol (like
  325    *   "javascript:"), then the URL remains unchanged. This includes all
  326    *   URLs generated via Url::toString() and UrlGeneratorTrait::url().
  327    * - If the value is a well-formed absolute URL with a dangerous protocol,
  328    *   the protocol is stripped. This process is repeated on the remaining URL
  329    *   until it is stripped down to a safe protocol.
  330    * - If the value is not a well-formed URL, the same sanitization behavior as
  331    *   for well-formed URLs will be invoked, which strips most substrings that
  332    *   precede a ":". The result can be used in URL attributes such as "href"
  333    *   or "src" (only after calling Html::escape() separately), but this may not
  334    *   produce valid HTML (for example, malformed URLs within "href" attributes
  335    *   fail HTML validation). This can be avoided by using
  336    *   Url::fromUri($possibly_not_a_url)->toString(), which either throws an
  337    *   exception or returns a well-formed URL.
  338    *
  339    * @param string $uri
  340    *   A plain-text URI that might contain dangerous protocols.
  341    *
  342    * @return string
  343    *   A plain-text URI stripped of dangerous protocols. As with all plain-text
  344    *   strings, this return value must not be output to an HTML page without
  345    *   being sanitized first. However, it can be passed to functions
  346    *   expecting plain-text strings.
  347    *
  348    * @see \Drupal\Component\Utility\Html::escape()
  349    * @see \Drupal\Core\Url::toString()
  350    * @see \Drupal\Core\Routing\UrlGeneratorTrait::url()
  351    * @see \Drupal\Core\Url::fromUri()
  352    */
  353   public static function stripDangerousProtocols($uri) {
  354     $allowed_protocols = array_flip(static::$allowedProtocols);
  355 
  356     // Iteratively remove any invalid protocol found.
  357     do {
  358       $before = $uri;
  359       $colonpos = strpos($uri, ':');
  360       if ($colonpos > 0) {
  361         // We found a colon, possibly a protocol. Verify.
  362         $protocol = substr($uri, 0, $colonpos);
  363         // If a colon is preceded by a slash, question mark or hash, it cannot
  364         // possibly be part of the URL scheme. This must be a relative URL, which
  365         // inherits the (safe) protocol of the base document.
  366         if (preg_match('![/?#]!', $protocol)) {
  367           break;
  368         }
  369         // Check if this is a disallowed protocol. Per RFC2616, section 3.2.3
  370         // (URI Comparison) scheme comparison must be case-insensitive.
  371         if (!isset($allowed_protocols[strtolower($protocol)])) {
  372           $uri = substr($uri, $colonpos + 1);
  373         }
  374       }
  375     } while ($before != $uri);
  376 
  377     return $uri;
  378   }
  379 
  380   /**
  381    * Verifies the syntax of the given URL.
  382    *
  383    * This function should only be used on actual URLs. It should not be used for
  384    * Drupal menu paths, which can contain arbitrary characters.
  385    * Valid values per RFC 3986.
  386    *
  387    * @param string $url
  388    *   The URL to verify.
  389    * @param bool $absolute
  390    *   Whether the URL is absolute (beginning with a scheme such as "http:").
  391    *
  392    * @return bool
  393    *   TRUE if the URL is in a valid format, FALSE otherwise.
  394    */
  395   public static function isValid($url, $absolute = FALSE) {
  396     if ($absolute) {
  397       return (bool) preg_match("
  398         /^                                                      # Start at the beginning of the text
  399         (?:ftp|https?|feed):\/\/                                # Look for ftp, http, https or feed schemes
  400         (?:                                                     # Userinfo (optional) which is typically
  401           (?:(?:[\w\.\-\+!$&'\(\)*\+,;=]|%[0-9a-f]{2})+:)*      # a username or a username and password
  402           (?:[\w\.\-\+%!$&'\(\)*\+,;=]|%[0-9a-f]{2})+@          # combination
  403         )?
  404         (?:
  405           (?:[a-z0-9\-\.]|%[0-9a-f]{2})+                        # A domain name or a IPv4 address
  406           |(?:\[(?:[0-9a-f]{0,4}:)*(?:[0-9a-f]{0,4})\])         # or a well formed IPv6 address
  407         )
  408         (?::[0-9]+)?                                            # Server port number (optional)
  409         (?:[\/|\?]
  410           (?:[\w#!:\.\?\+=&@$'~*,;\/\(\)\[\]\-]|%[0-9a-f]{2})   # The path and query (optional)
  411         *)?
  412       $/xi", $url);
  413     }
  414     else {
  415       return (bool) preg_match("/^(?:[\w#!:\.\?\+=&@$'~*,;\/\(\)\[\]\-]|%[0-9a-f]{2})+$/i", $url);
  416     }
  417   }
  418 
  419 }