X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=extlib%2FNet%2FURL2.php;h=1d2f7fa6a4dcfdbbf170e5a360e64204ecf4f9e9;hb=c285f80b1830cffd20a28c693d74c59f8c3c39f6;hp=7a654aed8f642e9c1df7e3bd7f82142ef70faaf4;hpb=c2ba727d6154394fe234e9297b430ac69fd24c88;p=quix0rs-gnu-social.git diff --git a/extlib/Net/URL2.php b/extlib/Net/URL2.php index 7a654aed8f..1d2f7fa6a4 100644 --- a/extlib/Net/URL2.php +++ b/extlib/Net/URL2.php @@ -1,44 +1,58 @@ | -// +-----------------------------------------------------------------------+ -// -// $Id: URL2.php,v 1.10 2008/04/26 21:57:08 schmidt Exp $ -// -// Net_URL2 Class (PHP5 Only) - -// This code is released under the BSD License - http://www.opensource.org/licenses/bsd-license.php /** - * @license BSD License + * Net_URL2, a class representing a URL as per RFC 3986. + * + * PHP version 5 + * + * LICENSE: + * + * Copyright (c) 2007-2009, Peytz & Co. A/S + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * * Neither the name of the Net_URL2 nor the names of its contributors may + * be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * @category Networking + * @package Net_URL2 + * @author Christian Schmidt + * @copyright 2007-2009 Peytz & Co. A/S + * @license https://spdx.org/licenses/BSD-3-Clause BSD-3-Clause + * @version CVS: $Id$ + * @link https://tools.ietf.org/html/rfc3986 + */ + +/** + * Represents a URL as per RFC 3986. + * + * @category Networking + * @package Net_URL2 + * @author Christian Schmidt + * @copyright 2007-2009 Peytz & Co. A/S + * @license https://spdx.org/licenses/BSD-3-Clause BSD-3-Clause + * @version Release: 2.1.2 + * @link https://pear.php.net/package/Net_URL2 */ class Net_URL2 { @@ -46,149 +60,182 @@ class Net_URL2 * Do strict parsing in resolve() (see RFC 3986, section 5.2.2). Default * is true. */ - const OPTION_STRICT = 'strict'; + const OPTION_STRICT = 'strict'; /** * Represent arrays in query using PHP's [] notation. Default is true. */ - const OPTION_USE_BRACKETS = 'use_brackets'; + const OPTION_USE_BRACKETS = 'use_brackets'; + + /** + * Drop zero-based integer sequences in query using PHP's [] notation. Default + * is true. + */ + const OPTION_DROP_SEQUENCE = 'drop_sequence'; /** * URL-encode query variable keys. Default is true. */ - const OPTION_ENCODE_KEYS = 'encode_keys'; + const OPTION_ENCODE_KEYS = 'encode_keys'; /** * Query variable separators when parsing the query string. Every character - * is considered a separator. Default is specified by the - * arg_separator.input php.ini setting (this defaults to "&"). + * is considered a separator. Default is "&". */ - const OPTION_SEPARATOR_INPUT = 'input_separator'; + const OPTION_SEPARATOR_INPUT = 'input_separator'; /** * Query variable separator used when generating the query string. Default - * is specified by the arg_separator.output php.ini setting (this defaults - * to "&"). + * is "&". */ const OPTION_SEPARATOR_OUTPUT = 'output_separator'; /** * Default options corresponds to how PHP handles $_GET. */ - private $options = array( + private $_options = array( self::OPTION_STRICT => true, self::OPTION_USE_BRACKETS => true, + self::OPTION_DROP_SEQUENCE => true, self::OPTION_ENCODE_KEYS => true, - self::OPTION_SEPARATOR_INPUT => 'x&', - self::OPTION_SEPARATOR_OUTPUT => 'x&', + self::OPTION_SEPARATOR_INPUT => '&', + self::OPTION_SEPARATOR_OUTPUT => '&', ); /** * @var string|bool */ - private $scheme = false; + private $_scheme = false; /** * @var string|bool */ - private $userinfo = false; + private $_userinfo = false; /** * @var string|bool */ - private $host = false; + private $_host = false; /** - * @var int|bool + * @var string|bool */ - private $port = false; + private $_port = false; /** * @var string */ - private $path = ''; + private $_path = ''; /** * @var string|bool */ - private $query = false; + private $_query = false; /** * @var string|bool */ - private $fragment = false; + private $_fragment = false; /** + * Constructor. + * * @param string $url an absolute or relative URL - * @param array $options + * @param array $options an array of OPTION_xxx constants + * + * @uses self::parseUrl() */ - public function __construct($url, $options = null) + public function __construct($url, array $options = array()) { - $this->setOption(self::OPTION_SEPARATOR_INPUT, - ini_get('arg_separator.input')); - $this->setOption(self::OPTION_SEPARATOR_OUTPUT, - ini_get('arg_separator.output')); - if (is_array($options)) { - foreach ($options as $optionName => $value) { - $this->setOption($optionName); + foreach ($options as $optionName => $value) { + if (array_key_exists($optionName, $this->_options)) { + $this->_options[$optionName] = $value; } } - if (preg_match('@^([a-z][a-z0-9.+-]*):@i', $url, $reg)) { - $this->scheme = $reg[1]; - $url = substr($url, strlen($reg[0])); - } + $this->parseUrl($url); + } - if (preg_match('@^//([^/#?]+)@', $url, $reg)) { - $this->setAuthority($reg[1]); - $url = substr($url, strlen($reg[0])); + /** + * Magic Setter. + * + * This method will magically set the value of a private variable ($var) + * with the value passed as the args + * + * @param string $var The private variable to set. + * @param mixed $arg An argument of any type. + * + * @return void + */ + public function __set($var, $arg) + { + $method = 'set' . $var; + if (method_exists($this, $method)) { + $this->$method($arg); } + } - $i = strcspn($url, '?#'); - $this->path = substr($url, 0, $i); - $url = substr($url, $i); - - if (preg_match('@^\?([^#]*)@', $url, $reg)) { - $this->query = $reg[1]; - $url = substr($url, strlen($reg[0])); + /** + * Magic Getter. + * + * This is the magic get method to retrieve the private variable + * that was set by either __set() or it's setter... + * + * @param string $var The property name to retrieve. + * + * @return mixed $this->$var Either a boolean false if the + * property is not set or the value + * of the private property. + */ + public function __get($var) + { + $method = 'get' . $var; + if (method_exists($this, $method)) { + return $this->$method(); } - if ($url) { - $this->fragment = substr($url, 1); - } + return false; } /** * Returns the scheme, e.g. "http" or "urn", or false if there is no * scheme specified, i.e. if this is a relative URL. * - * @return string|bool + * @return string|bool */ public function getScheme() { - return $this->scheme; + return $this->_scheme; } /** - * @param string|bool $scheme + * Sets the scheme, e.g. "http" or "urn". Specify false if there is no + * scheme specified, i.e. if this is a relative URL. * - * @return void - * @see getScheme() + * @param string|bool $scheme e.g. "http" or "urn", or false if there is no + * scheme specified, i.e. if this is a relative + * URL + * + * @return $this + * @see getScheme */ public function setScheme($scheme) { - $this->scheme = $scheme; + $this->_scheme = $scheme; + return $this; } /** * Returns the user part of the userinfo part (the part preceding the first * ":"), or false if there is no userinfo part. * - * @return string|bool + * @return string|bool */ public function getUser() { - return $this->userinfo !== false ? preg_replace('@:.*$@', '', $this->userinfo) : false; + return $this->_userinfo !== false + ? preg_replace('(:.*$)', '', $this->_userinfo) + : false; } /** @@ -197,22 +244,24 @@ class Net_URL2 * contain "@" in front of the hostname) or the userinfo part does not * contain ":". * - * @return string|bool + * @return string|bool */ public function getPassword() { - return $this->userinfo !== false ? substr(strstr($this->userinfo, ':'), 1) : false; + return $this->_userinfo !== false + ? substr(strstr($this->_userinfo, ':'), 1) + : false; } /** * Returns the userinfo part, or false if there is none, i.e. if the * authority part does not contain "@". * - * @return string|bool + * @return string|bool */ public function getUserinfo() { - return $this->userinfo; + return $this->_userinfo; } /** @@ -220,108 +269,136 @@ class Net_URL2 * in the userinfo part as username ":" password. * * @param string|bool $userinfo userinfo or username - * @param string|bool $password + * @param string|bool $password optional password, or false * - * @return void + * @return $this */ public function setUserinfo($userinfo, $password = false) { - $this->userinfo = $userinfo; if ($password !== false) { - $this->userinfo .= ':' . $password; + $userinfo .= ':' . $password; } + + if ($userinfo !== false) { + $userinfo = $this->_encodeData($userinfo); + } + + $this->_userinfo = $userinfo; + return $this; } /** * Returns the host part, or false if there is no authority part, e.g. * relative URLs. * - * @return string|bool + * @return string|bool a hostname, an IP address, or false */ public function getHost() { - return $this->host; + return $this->_host; } /** - * @param string|bool $host + * Sets the host part. Specify false if there is no authority part, e.g. + * relative URLs. * - * @return void + * @param string|bool $host a hostname, an IP address, or false + * + * @return $this */ public function setHost($host) { - $this->host = $host; + $this->_host = $host; + return $this; } /** * Returns the port number, or false if there is no port number specified, * i.e. if the default port is to be used. * - * @return int|bool + * @return string|bool */ public function getPort() { - return $this->port; + return $this->_port; } /** - * @param int|bool $port + * Sets the port number. Specify false if there is no port number specified, + * i.e. if the default port is to be used. * - * @return void + * @param string|bool $port a port number, or false + * + * @return $this */ public function setPort($port) { - $this->port = intval($port); + $this->_port = $port; + return $this; } /** * Returns the authority part, i.e. [ userinfo "@" ] host [ ":" port ], or - * false if there is no authority none. + * false if there is no authority. * * @return string|bool */ public function getAuthority() { - if (!$this->host) { + if (false === $this->_host) { return false; } $authority = ''; - if ($this->userinfo !== false) { - $authority .= $this->userinfo . '@'; + if (strlen($this->_userinfo)) { + $authority .= $this->_userinfo . '@'; } - $authority .= $this->host; + $authority .= $this->_host; - if ($this->port !== false) { - $authority .= ':' . $this->port; + if ($this->_port !== false) { + $authority .= ':' . $this->_port; } return $authority; } /** - * @param string|false $authority + * Sets the authority part, i.e. [ userinfo "@" ] host [ ":" port ]. Specify + * false if there is no authority. * - * @return void + * @param string|bool $authority a hostname or an IP address, possibly + * with userinfo prefixed and port number + * appended, e.g. "foo:bar@example.org:81". + * + * @return $this */ public function setAuthority($authority) { - $this->user = false; - $this->pass = false; - $this->host = false; - $this->port = false; - if (preg_match('@^(([^\@]+)\@)?([^:]+)(:(\d*))?$@', $authority, $reg)) { - if ($reg[1]) { - $this->userinfo = $reg[2]; - } + $this->_userinfo = false; + $this->_host = false; + $this->_port = false; - $this->host = $reg[3]; - if (isset($reg[5])) { - $this->port = intval($reg[5]); - } + if ('' === $authority) { + $this->_host = $authority; + return $this; } + + if (!preg_match('(^(([^@]*)@)?(.+?)(:(\d*))?$)', $authority, $matches)) { + return $this; + } + + if ($matches[1]) { + $this->_userinfo = $this->_encodeData($matches[2]); + } + + $this->_host = $matches[3]; + + if (isset($matches[5]) && strlen($matches[5])) { + $this->_port = $matches[5]; + } + return $this; } /** @@ -331,113 +408,108 @@ class Net_URL2 */ public function getPath() { - return $this->path; + return $this->_path; } /** - * @param string $path + * Sets the path part (possibly an empty string). * - * @return void + * @param string $path a path + * + * @return $this */ public function setPath($path) { - $this->path = $path; + $this->_path = $path; + return $this; } /** * Returns the query string (excluding the leading "?"), or false if "?" - * isn't present in the URL. + * is not present in the URL. * * @return string|bool - * @see self::getQueryVariables() + * @see getQueryVariables */ public function getQuery() { - return $this->query; + return $this->_query; } /** - * @param string|bool $query + * Sets the query string (excluding the leading "?"). Specify false if "?" + * is not present in the URL. * - * @return void - * @see self::setQueryVariables() + * @param string|bool $query a query string, e.g. "foo=1&bar=2" + * + * @return $this + * @see setQueryVariables */ public function setQuery($query) { - $this->query = $query; + $this->_query = $query; + return $this; } /** - * Returns the fragment name, or false if "#" isn't present in the URL. + * Returns the fragment name, or false if "#" is not present in the URL. * - * @return string|bool + * @return string|bool */ public function getFragment() { - return $this->fragment; + return $this->_fragment; } /** - * @param string|bool $fragment + * Sets the fragment name. Specify false if "#" is not present in the URL. * - * @return void + * @param string|bool $fragment a fragment excluding the leading "#", or + * false + * + * @return $this */ public function setFragment($fragment) { - $this->fragment = $fragment; + $this->_fragment = $fragment; + return $this; } /** * Returns the query string like an array as the variables would appear in - * $_GET in a PHP script. + * $_GET in a PHP script. If the URL does not contain a "?", an empty array + * is returned. * - * @return array + * @return array */ public function getQueryVariables() { - $pattern = '/[' . - preg_quote($this->getOption(self::OPTION_SEPARATOR_INPUT), '/') . - ']/'; - $parts = preg_split($pattern, $this->query, -1, PREG_SPLIT_NO_EMPTY); + $separator = $this->getOption(self::OPTION_SEPARATOR_INPUT); + $encodeKeys = $this->getOption(self::OPTION_ENCODE_KEYS); + $useBrackets = $this->getOption(self::OPTION_USE_BRACKETS); + $return = array(); - foreach ($parts as $part) { - if (strpos($part, '=') !== false) { - list($key, $value) = explode('=', $part, 2); - } else { - $key = $part; - $value = null; - } + for ($part = strtok($this->_query, $separator); + strlen($part); + $part = strtok($separator) + ) { + list($key, $value) = explode('=', $part, 2) + array(1 => ''); - if ($this->getOption(self::OPTION_ENCODE_KEYS)) { + if ($encodeKeys) { $key = rawurldecode($key); } $value = rawurldecode($value); - if ($this->getOption(self::OPTION_USE_BRACKETS) && - preg_match('#^(.*)\[([0-9a-z_-]*)\]#i', $key, $matches)) { - - $key = $matches[1]; - $idx = $matches[2]; - - // Ensure is an array - if (empty($return[$key]) || !is_array($return[$key])) { - $return[$key] = array(); - } - - // Add data - if ($idx === '') { + if ($useBrackets) { + $return = $this->_queryArrayByKey($key, $value, $return); + } else { + if (isset($return[$key])) { + $return[$key] = (array) $return[$key]; $return[$key][] = $value; } else { - $return[$key][$idx] = $value; + $return[$key] = $value; } - } elseif (!$this->getOption(self::OPTION_USE_BRACKETS) - && !empty($return[$key]) - ) { - $return[$key] = (array) $return[$key]; - $return[$key][] = $value; - } else { - $return[$key] = $value; } } @@ -445,52 +517,170 @@ class Net_URL2 } /** - * @param array $array (name => value) array + * Parse a single query key=value pair into an existing php array * - * @return void + * @param string $key query-key + * @param string $value query-value + * @param array $array of existing query variables (if any) + * + * @return mixed */ - public function setQueryVariables(array $array) + private function _queryArrayByKey($key, $value, array $array = array()) { - if (!$array) { - $this->query = false; + if (!strlen($key)) { + return $array; + } + + $offset = $this->_queryKeyBracketOffset($key); + if ($offset === false) { + $name = $key; } else { - foreach ($array as $name => $value) { - if ($this->getOption(self::OPTION_ENCODE_KEYS)) { - $name = rawurlencode($name); + $name = substr($key, 0, $offset); + } + + if (!strlen($name)) { + return $array; + } + + if (!$offset) { + // named value + $array[$name] = $value; + } else { + // array + $brackets = substr($key, $offset); + if (!isset($array[$name])) { + $array[$name] = null; + } + $array[$name] = $this->_queryArrayByBrackets( + $brackets, $value, $array[$name] + ); + } + + return $array; + } + + /** + * Parse a key-buffer to place value in array + * + * @param string $buffer to consume all keys from + * @param string $value to be set/add + * @param array $array to traverse and set/add value in + * + * @throws Exception + * @return array + */ + private function _queryArrayByBrackets($buffer, $value, array $array = null) + { + $entry = &$array; + + for ($iteration = 0; strlen($buffer); $iteration++) { + $open = $this->_queryKeyBracketOffset($buffer); + if ($open !== 0) { + // Opening bracket [ must exist at offset 0, if not, there is + // no bracket to parse and the value dropped. + // if this happens in the first iteration, this is flawed, see + // as well the second exception below. + if ($iteration) { + break; } + // @codeCoverageIgnoreStart + throw new Exception( + 'Net_URL2 Internal Error: '. __METHOD__ .'(): ' . + 'Opening bracket [ must exist at offset 0' + ); + // @codeCoverageIgnoreEnd + } - if (is_array($value)) { - foreach ($value as $k => $v) { - $parts[] = $this->getOption(self::OPTION_USE_BRACKETS) - ? sprintf('%s[%s]=%s', $name, $k, $v) - : ($name . '=' . $v); - } - } elseif (!is_null($value)) { - $parts[] = $name . '=' . $value; - } else { - $parts[] = $name; + $close = strpos($buffer, ']', 1); + if (!$close) { + // this error condition should never be reached as this is a + // private method and bracket pairs are checked beforehand. + // See as well the first exception for the opening bracket. + // @codeCoverageIgnoreStart + throw new Exception( + 'Net_URL2 Internal Error: '. __METHOD__ .'(): ' . + 'Closing bracket ] must exist, not found' + ); + // @codeCoverageIgnoreEnd + } + + $index = substr($buffer, 1, $close - 1); + if (strlen($index)) { + $entry = &$entry[$index]; + } else { + if (!is_array($entry)) { + $entry = array(); } + $entry[] = &$new; + $entry = &$new; + unset($new); } - $this->query = implode($this->getOption(self::OPTION_SEPARATOR_OUTPUT), - $parts); + $buffer = substr($buffer, $close + 1); + } + + $entry = $value; + + return $array; + } + + /** + * Query-key has brackets ("...[]") + * + * @param string $key query-key + * + * @return bool|int offset of opening bracket, false if no brackets + */ + private function _queryKeyBracketOffset($key) + { + if (false !== $open = strpos($key, '[') + and false === strpos($key, ']', $open + 1) + ) { + $open = false; + } + + return $open; + } + + /** + * Sets the query string to the specified variable in the query string. + * + * @param array $array (name => value) array + * + * @return $this + */ + public function setQueryVariables(array $array) + { + if (!$array) { + $this->_query = false; + } else { + $this->_query = $this->buildQuery( + $array, + $this->getOption(self::OPTION_SEPARATOR_OUTPUT) + ); } + return $this; } /** - * @param string $name - * @param mixed $value + * Sets the specified variable in the query string. * - * @return array + * @param string $name variable name + * @param mixed $value variable value + * + * @return $this */ public function setQueryVariable($name, $value) { $array = $this->getQueryVariables(); $array[$name] = $value; $this->setQueryVariables($array); + return $this; } /** - * @param string $name + * Removes the specified variable from the query string. + * + * @param string $name a query string variable, e.g. "foo" in "?foo=1" * * @return void */ @@ -504,98 +694,180 @@ class Net_URL2 /** * Returns a string representation of this URL. * - * @return string + * @return string */ public function getURL() { // See RFC 3986, section 5.3 - $url = ""; + $url = ''; - if ($this->scheme !== false) { - $url .= $this->scheme . ':'; + if ($this->_scheme !== false) { + $url .= $this->_scheme . ':'; } $authority = $this->getAuthority(); - if ($authority !== false) { - $url .= '//' . $authority; + if ($authority === false && strtolower($this->_scheme) === 'file') { + $authority = ''; } - $url .= $this->path; - if ($this->query !== false) { - $url .= '?' . $this->query; + $url .= $this->_buildAuthorityAndPath($authority, $this->_path); + + if ($this->_query !== false) { + $url .= '?' . $this->_query; } - if ($this->fragment !== false) { - $url .= '#' . $this->fragment; + if ($this->_fragment !== false) { + $url .= '#' . $this->_fragment; } - + return $url; } - /** + /** + * Put authority and path together, wrapping authority + * into proper separators/terminators. + * + * @param string|bool $authority authority + * @param string $path path + * + * @return string + */ + private function _buildAuthorityAndPath($authority, $path) + { + if ($authority === false) { + return $path; + } + + $terminator = ($path !== '' && $path[0] !== '/') ? '/' : ''; + + return '//' . $authority . $terminator . $path; + } + + /** + * Returns a string representation of this URL. + * + * @return string + * @link https://php.net/language.oop5.magic#object.tostring + */ + public function __toString() + { + return $this->getURL(); + } + + /** * Returns a normalized string representation of this URL. This is useful * for comparison of URLs. * - * @return string + * @return string */ public function getNormalizedURL() { $url = clone $this; $url->normalize(); - return $url->getUrl(); + return $url->getURL(); } - /** - * Returns a normalized Net_URL2 instance. + /** + * Normalizes the URL + * + * See RFC 3986, Section 6. Normalization and Comparison * - * @return Net_URL2 + * @link https://tools.ietf.org/html/rfc3986#section-6 + * + * @return void */ public function normalize() { - // See RFC 3886, section 6 + // See RFC 3986, section 6 - // Schemes are case-insensitive - if ($this->scheme) { - $this->scheme = strtolower($this->scheme); + // Scheme is case-insensitive + if ($this->_scheme) { + $this->_scheme = strtolower($this->_scheme); } - // Hostnames are case-insensitive - if ($this->host) { - $this->host = strtolower($this->host); + // Hostname is case-insensitive + if ($this->_host) { + $this->_host = strtolower($this->_host); } // Remove default port number for known schemes (RFC 3986, section 6.2.3) - if ($this->port && - $this->scheme && - $this->port == getservbyname($this->scheme, 'tcp')) { - - $this->port = false; + if ('' === $this->_port + || $this->_port + && $this->_scheme + && $this->_port == getservbyname($this->_scheme, 'tcp') + ) { + $this->_port = false; } // Normalize case of %XX percentage-encodings (RFC 3986, section 6.2.2.1) - foreach (array('userinfo', 'host', 'path') as $part) { - if ($this->$part) { - $this->$part = preg_replace('/%[0-9a-f]{2}/ie', 'strtoupper("\0")', $this->$part); + // Normalize percentage-encoded unreserved characters (section 6.2.2.2) + $fields = array(&$this->_userinfo, &$this->_host, &$this->_path, + &$this->_query, &$this->_fragment); + foreach ($fields as &$field) { + if ($field !== false) { + $field = $this->_normalize("$field"); } } + unset($field); // Path segment normalization (RFC 3986, section 6.2.2.3) - $this->path = self::removeDotSegments($this->path); + $this->_path = self::removeDotSegments($this->_path); // Scheme based normalization (RFC 3986, section 6.2.3) - if ($this->host && !$this->path) { - $this->path = '/'; + if (false !== $this->_host && '' === $this->_path) { + $this->_path = '/'; + } + + // path should start with '/' if there is authority (section 3.3.) + if (strlen($this->getAuthority()) + && strlen($this->_path) + && $this->_path[0] !== '/' + ) { + $this->_path = '/' . $this->_path; } } + /** + * Normalize case of %XX percentage-encodings (RFC 3986, section 6.2.2.1) + * Normalize percentage-encoded unreserved characters (section 6.2.2.2) + * + * @param string|array $mixed string or array of strings to normalize + * + * @return string|array + * @see normalize + * @see _normalizeCallback() + */ + private function _normalize($mixed) + { + return preg_replace_callback( + '((?:%[0-9a-fA-Z]{2})+)', array($this, '_normalizeCallback'), + $mixed + ); + } + + /** + * Callback for _normalize() of %XX percentage-encodings + * + * @param array $matches as by preg_replace_callback + * + * @return string + * @see normalize + * @see _normalize + * @SuppressWarnings(PHPMD.UnusedPrivateMethod) + */ + private function _normalizeCallback($matches) + { + return self::urlencode(urldecode($matches[0])); + } + /** * Returns whether this instance represents an absolute URL. * - * @return bool + * @return bool */ public function isAbsolute() { - return (bool) $this->scheme; + return (bool) $this->_scheme; } /** @@ -604,71 +876,95 @@ class Net_URL2 * * @param Net_URL2|string $reference relative URL * - * @return Net_URL2 + * @throws Exception + * @return $this */ public function resolve($reference) { - if (is_string($reference)) { + if (!$reference instanceof Net_URL2) { $reference = new self($reference); } - if (!$this->isAbsolute()) { - throw new Exception('Base-URL must be absolute'); + if (!$reference->_isFragmentOnly() && !$this->isAbsolute()) { + throw new Exception( + 'Base-URL must be absolute if reference is not fragment-only' + ); } // A non-strict parser may ignore a scheme in the reference if it is // identical to the base URI's scheme. - if (!$this->getOption(self::OPTION_STRICT) && $reference->scheme == $this->scheme) { - $reference->scheme = false; + if (!$this->getOption(self::OPTION_STRICT) + && $reference->_scheme == $this->_scheme + ) { + $reference->_scheme = false; } $target = new self(''); - if ($reference->scheme !== false) { - $target->scheme = $reference->scheme; + if ($reference->_scheme !== false) { + $target->_scheme = $reference->_scheme; $target->setAuthority($reference->getAuthority()); - $target->path = self::removeDotSegments($reference->path); - $target->query = $reference->query; + $target->_path = self::removeDotSegments($reference->_path); + $target->_query = $reference->_query; } else { $authority = $reference->getAuthority(); if ($authority !== false) { $target->setAuthority($authority); - $target->path = self::removeDotSegments($reference->path); - $target->query = $reference->query; + $target->_path = self::removeDotSegments($reference->_path); + $target->_query = $reference->_query; } else { - if ($reference->path == '') { - $target->path = $this->path; - if ($reference->query !== false) { - $target->query = $reference->query; + if ($reference->_path == '') { + $target->_path = $this->_path; + if ($reference->_query !== false) { + $target->_query = $reference->_query; } else { - $target->query = $this->query; + $target->_query = $this->_query; } } else { - if (substr($reference->path, 0, 1) == '/') { - $target->path = self::removeDotSegments($reference->path); + if (substr($reference->_path, 0, 1) == '/') { + $target->_path = self::removeDotSegments($reference->_path); } else { // Merge paths (RFC 3986, section 5.2.3) - if ($this->host !== false && $this->path == '') { - $target->path = '/' . $this->path; + if ($this->_host !== false && $this->_path == '') { + $target->_path = '/' . $reference->_path; } else { - $i = strrpos($this->path, '/'); + $i = strrpos($this->_path, '/'); if ($i !== false) { - $target->path = substr($this->path, 0, $i + 1); + $target->_path = substr($this->_path, 0, $i + 1); } - $target->path .= $reference->path; + $target->_path .= $reference->_path; } - $target->path = self::removeDotSegments($target->path); + $target->_path = self::removeDotSegments($target->_path); } - $target->query = $reference->query; + $target->_query = $reference->_query; } $target->setAuthority($this->getAuthority()); } - $target->scheme = $this->scheme; + $target->_scheme = $this->_scheme; } - $target->fragment = $reference->fragment; + $target->_fragment = $reference->_fragment; return $target; } + /** + * URL is fragment-only + * + * @SuppressWarnings(PHPMD.UnusedPrivateMethod) + * @return bool + */ + private function _isFragmentOnly() + { + return ( + $this->_fragment !== false + && $this->_query === false + && $this->_path === '' + && $this->_port === false + && $this->_host === false + && $this->_userinfo === false + && $this->_scheme === false + ); + } + /** * Removes dots as described in RFC 3986, section 5.2.4, e.g. * "/foo/../bar/baz" => "/bar/baz" @@ -677,56 +973,81 @@ class Net_URL2 * * @return string a path */ - private static function removeDotSegments($path) + public static function removeDotSegments($path) { + $path = (string) $path; $output = ''; // Make sure not to be trapped in an infinite loop due to a bug in this // method - $j = 0; - while ($path && $j++ < 100) { - // Step A - if (substr($path, 0, 2) == './') { + $loopLimit = 256; + $j = 0; + while ('' !== $path && $j++ < $loopLimit) { + if (substr($path, 0, 2) === './') { + // Step 2.A $path = substr($path, 2); - } elseif (substr($path, 0, 3) == '../') { + } elseif (substr($path, 0, 3) === '../') { + // Step 2.A $path = substr($path, 3); - - // Step B - } elseif (substr($path, 0, 3) == '/./' || $path == '/.') { + } elseif (substr($path, 0, 3) === '/./' || $path === '/.') { + // Step 2.B $path = '/' . substr($path, 3); - - // Step C - } elseif (substr($path, 0, 4) == '/../' || $path == '/..') { - $path = '/' . substr($path, 4); - $i = strrpos($output, '/'); + } elseif (substr($path, 0, 4) === '/../' || $path === '/..') { + // Step 2.C + $path = '/' . substr($path, 4); + $i = strrpos($output, '/'); $output = $i === false ? '' : substr($output, 0, $i); - - // Step D - } elseif ($path == '.' || $path == '..') { + } elseif ($path === '.' || $path === '..') { + // Step 2.D $path = ''; - - // Step E } else { - $i = strpos($path, '/'); - if ($i === 0) { - $i = strpos($path, '/', 1); - } + // Step 2.E + $i = strpos($path, '/', $path[0] === '/'); if ($i === false) { - $i = strlen($path); + $output .= $path; + $path = ''; + break; } $output .= substr($path, 0, $i); $path = substr($path, $i); } } + if ($path !== '') { + $message = sprintf( + 'Unable to remove dot segments; hit loop limit %d (left: %s)', + $j, var_export($path, true) + ); + trigger_error($message, E_USER_WARNING); + } + return $output; } + /** + * Percent-encodes all non-alphanumeric characters except these: _ . - ~ + * Similar to PHP's rawurlencode(), except that it also encodes ~ in PHP + * 5.2.x and earlier. + * + * @param string $string string to encode + * + * @return string + */ + public static function urlencode($string) + { + $encoded = rawurlencode($string); + + // This is only necessary in PHP < 5.3. + $encoded = str_replace('%7E', '~', $encoded); + return $encoded; + } + /** * Returns a Net_URL2 instance representing the canonical URL of the * currently executing PHP script. - * - * @return string + * + * @throws Exception + * @return string */ public static function getCanonical() { @@ -737,13 +1058,13 @@ class Net_URL2 // Begin with a relative URL $url = new self($_SERVER['PHP_SELF']); - $url->scheme = isset($_SERVER['HTTPS']) ? 'https' : 'http'; - $url->host = $_SERVER['SERVER_NAME']; - $port = intval($_SERVER['SERVER_PORT']); - if ($url->scheme == 'http' && $port != 80 || - $url->scheme == 'https' && $port != 443) { - - $url->port = $port; + $url->_scheme = isset($_SERVER['HTTPS']) ? 'https' : 'http'; + $url->_host = $_SERVER['SERVER_NAME']; + $port = $_SERVER['SERVER_PORT']; + if ($url->_scheme == 'http' && $port != 80 + || $url->_scheme == 'https' && $port != 443 + ) { + $url->_port = $port; } return $url; } @@ -762,7 +1083,8 @@ class Net_URL2 * Returns a Net_URL2 instance representing the URL used to retrieve the * current request. * - * @return Net_URL2 + * @throws Exception + * @return $this */ public static function getRequested() { @@ -773,41 +1095,125 @@ class Net_URL2 // Begin with a relative URL $url = new self($_SERVER['REQUEST_URI']); - $url->scheme = isset($_SERVER['HTTPS']) ? 'https' : 'http'; + $url->_scheme = isset($_SERVER['HTTPS']) ? 'https' : 'http'; // Set host and possibly port $url->setAuthority($_SERVER['HTTP_HOST']); return $url; } /** - * Sets the specified option. + * Returns the value of the specified option. * - * @param string $optionName a self::OPTION_ constant - * @param mixed $value option value + * @param string $optionName The name of the option to retrieve * - * @return void - * @see self::OPTION_STRICT - * @see self::OPTION_USE_BRACKETS - * @see self::OPTION_ENCODE_KEYS + * @return mixed */ - function setOption($optionName, $value) + public function getOption($optionName) { - if (!array_key_exists($optionName, $this->options)) { - return false; + return isset($this->_options[$optionName]) + ? $this->_options[$optionName] : false; + } + + /** + * A simple version of http_build_query in userland. The encoded string is + * percentage encoded according to RFC 3986. + * + * @param array $data An array, which has to be converted into + * QUERY_STRING. Anything is possible. + * @param string $separator Separator {@link self::OPTION_SEPARATOR_OUTPUT} + * @param string $key For stacked values (arrays in an array). + * + * @return string + */ + protected function buildQuery(array $data, $separator, $key = null) + { + $query = array(); + $drop_names = ( + $this->_options[self::OPTION_DROP_SEQUENCE] === true + && array_keys($data) === array_keys(array_values($data)) + ); + foreach ($data as $name => $value) { + if ($this->getOption(self::OPTION_ENCODE_KEYS) === true) { + $name = rawurlencode($name); + } + if ($key !== null) { + if ($this->getOption(self::OPTION_USE_BRACKETS) === true) { + $drop_names && $name = ''; + $name = $key . '[' . $name . ']'; + } else { + $name = $key; + } + } + if (is_array($value)) { + $query[] = $this->buildQuery($value, $separator, $name); + } else { + $query[] = $name . '=' . rawurlencode($value); + } } - $this->options[$optionName] = $value; + return implode($separator, $query); } /** - * Returns the value of the specified option. + * This method uses a regex to parse the url into the designated parts. * - * @param string $optionName The name of the option to retrieve + * @param string $url URL * - * @return mixed + * @return void + * @uses self::$_scheme, self::setAuthority(), self::$_path, self::$_query, + * self::$_fragment + * @see __construct + */ + protected function parseUrl($url) + { + // The regular expression is copied verbatim from RFC 3986, appendix B. + // The expression does not validate the URL but matches any string. + preg_match( + '(^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?)', + $url, $matches + ); + + // "path" is always present (possibly as an empty string); the rest + // are optional. + $this->_scheme = !empty($matches[1]) ? $matches[2] : false; + $this->setAuthority(!empty($matches[3]) ? $matches[4] : false); + $this->_path = $this->_encodeData($matches[5]); + $this->_query = !empty($matches[6]) + ? $this->_encodeData($matches[7]) + : false + ; + $this->_fragment = !empty($matches[8]) ? $matches[9] : false; + } + + /** + * Encode characters that might have been forgotten to encode when passing + * in an URL. Applied onto Userinfo, Path and Query. + * + * @param string $url URL + * + * @return string + * @see parseUrl + * @see setAuthority + * @link https://pear.php.net/bugs/bug.php?id=20425 + */ + private function _encodeData($url) + { + return preg_replace_callback( + '([\x-\x20\x22\x3C\x3E\x7F-\xFF]+)', + array($this, '_encodeCallback'), $url + ); + } + + /** + * callback for encoding character data + * + * @param array $matches Matches + * + * @return string + * @see _encodeData + * @SuppressWarnings(PHPMD.UnusedPrivateMethod) */ - function getOption($optionName) + private function _encodeCallback(array $matches) { - return isset($this->options[$optionName]) - ? $this->options[$optionName] : false; + return rawurlencode($matches[0]); } }