3 * Net_URL2, a class representing a URL as per RFC 3986.
9 * Copyright (c) 2007-2009, Peytz & Co. A/S
10 * All rights reserved.
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
16 * * Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * * Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in
20 * the documentation and/or other materials provided with the distribution.
21 * * Neither the name of the Net_URL2 nor the names of its contributors may
22 * be used to endorse or promote products derived from this software
23 * without specific prior written permission.
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
26 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
27 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
29 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
33 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 * @category Networking
39 * @author Christian Schmidt <schmidt@php.net>
40 * @copyright 2007-2009 Peytz & Co. A/S
41 * @license http://www.opensource.org/licenses/bsd-license.php New BSD License
42 * @version CVS: $Id: URL2.php 309223 2011-03-14 14:26:32Z till $
43 * @link http://www.rfc-editor.org/rfc/rfc3986.txt
47 * Represents a URL as per RFC 3986.
49 * @category Networking
51 * @author Christian Schmidt <schmidt@php.net>
52 * @copyright 2007-2009 Peytz & Co. A/S
53 * @license http://www.opensource.org/licenses/bsd-license.php New BSD License
54 * @version Release: @package_version@
55 * @link http://pear.php.net/package/Net_URL2
60 * Do strict parsing in resolve() (see RFC 3986, section 5.2.2). Default
63 const OPTION_STRICT = 'strict';
66 * Represent arrays in query using PHP's [] notation. Default is true.
68 const OPTION_USE_BRACKETS = 'use_brackets';
71 * URL-encode query variable keys. Default is true.
73 const OPTION_ENCODE_KEYS = 'encode_keys';
76 * Query variable separators when parsing the query string. Every character
77 * is considered a separator. Default is "&".
79 const OPTION_SEPARATOR_INPUT = 'input_separator';
82 * Query variable separator used when generating the query string. Default
85 const OPTION_SEPARATOR_OUTPUT = 'output_separator';
88 * Default options corresponds to how PHP handles $_GET.
90 private $_options = array(
91 self::OPTION_STRICT => true,
92 self::OPTION_USE_BRACKETS => true,
93 self::OPTION_ENCODE_KEYS => true,
94 self::OPTION_SEPARATOR_INPUT => '&',
95 self::OPTION_SEPARATOR_OUTPUT => '&',
101 private $_scheme = false;
106 private $_userinfo = false;
111 private $_host = false;
116 private $_port = false;
126 private $_query = false;
131 private $_fragment = false;
136 * @param string $url an absolute or relative URL
137 * @param array $options an array of OPTION_xxx constants
140 * @uses self::parseUrl()
142 public function __construct($url, array $options = array())
144 foreach ($options as $optionName => $value) {
145 if (array_key_exists($optionName, $this->_options)) {
146 $this->_options[$optionName] = $value;
150 $this->parseUrl($url);
156 * This method will magically set the value of a private variable ($var)
157 * with the value passed as the args
159 * @param string $var The private variable to set.
160 * @param mixed $arg An argument of any type.
163 public function __set($var, $arg)
165 $method = 'set' . $var;
166 if (method_exists($this, $method)) {
167 $this->$method($arg);
174 * This is the magic get method to retrieve the private variable
175 * that was set by either __set() or it's setter...
177 * @param string $var The property name to retrieve.
178 * @return mixed $this->$var Either a boolean false if the
179 * property is not set or the value
180 * of the private property.
182 public function __get($var)
184 $method = 'get' . $var;
185 if (method_exists($this, $method)) {
186 return $this->$method();
193 * Returns the scheme, e.g. "http" or "urn", or false if there is no
194 * scheme specified, i.e. if this is a relative URL.
196 * @return string|bool
198 public function getScheme()
200 return $this->_scheme;
204 * Sets the scheme, e.g. "http" or "urn". Specify false if there is no
205 * scheme specified, i.e. if this is a relative URL.
207 * @param string|bool $scheme e.g. "http" or "urn", or false if there is no
208 * scheme specified, i.e. if this is a relative
214 public function setScheme($scheme)
216 $this->_scheme = $scheme;
221 * Returns the user part of the userinfo part (the part preceding the first
222 * ":"), or false if there is no userinfo part.
224 * @return string|bool
226 public function getUser()
228 return $this->_userinfo !== false
229 ? preg_replace('@:.*$@', '', $this->_userinfo)
234 * Returns the password part of the userinfo part (the part after the first
235 * ":"), or false if there is no userinfo part (i.e. the URL does not
236 * contain "@" in front of the hostname) or the userinfo part does not
239 * @return string|bool
241 public function getPassword()
243 return $this->_userinfo !== false
244 ? substr(strstr($this->_userinfo, ':'), 1)
249 * Returns the userinfo part, or false if there is none, i.e. if the
250 * authority part does not contain "@".
252 * @return string|bool
254 public function getUserinfo()
256 return $this->_userinfo;
260 * Sets the userinfo part. If two arguments are passed, they are combined
261 * in the userinfo part as username ":" password.
263 * @param string|bool $userinfo userinfo or username
264 * @param string|bool $password optional password, or false
268 public function setUserinfo($userinfo, $password = false)
270 $this->_userinfo = $userinfo;
271 if ($password !== false) {
272 $this->_userinfo .= ':' . $password;
278 * Returns the host part, or false if there is no authority part, e.g.
281 * @return string|bool a hostname, an IP address, or false
283 public function getHost()
289 * Sets the host part. Specify false if there is no authority part, e.g.
292 * @param string|bool $host a hostname, an IP address, or false
296 public function setHost($host)
298 $this->_host = $host;
303 * Returns the port number, or false if there is no port number specified,
304 * i.e. if the default port is to be used.
306 * @return string|bool
308 public function getPort()
314 * Sets the port number. Specify false if there is no port number specified,
315 * i.e. if the default port is to be used.
317 * @param string|bool $port a port number, or false
321 public function setPort($port)
323 $this->_port = $port;
328 * Returns the authority part, i.e. [ userinfo "@" ] host [ ":" port ], or
329 * false if there is no authority.
331 * @return string|bool
333 public function getAuthority()
341 if ($this->_userinfo !== false) {
342 $authority .= $this->_userinfo . '@';
345 $authority .= $this->_host;
347 if ($this->_port !== false) {
348 $authority .= ':' . $this->_port;
355 * Sets the authority part, i.e. [ userinfo "@" ] host [ ":" port ]. Specify
356 * false if there is no authority.
358 * @param string|false $authority a hostname or an IP addresse, possibly
359 * with userinfo prefixed and port number
360 * appended, e.g. "foo:bar@example.org:81".
364 public function setAuthority($authority)
366 $this->_userinfo = false;
367 $this->_host = false;
368 $this->_port = false;
369 if (preg_match('@^(([^\@]*)\@)?([^:]+)(:(\d*))?$@', $authority, $reg)) {
371 $this->_userinfo = $reg[2];
374 $this->_host = $reg[3];
375 if (isset($reg[5])) {
376 $this->_port = $reg[5];
383 * Returns the path part (possibly an empty string).
387 public function getPath()
393 * Sets the path part (possibly an empty string).
395 * @param string $path a path
399 public function setPath($path)
401 $this->_path = $path;
406 * Returns the query string (excluding the leading "?"), or false if "?"
407 * is not present in the URL.
409 * @return string|bool
410 * @see self::getQueryVariables()
412 public function getQuery()
414 return $this->_query;
418 * Sets the query string (excluding the leading "?"). Specify false if "?"
419 * is not present in the URL.
421 * @param string|bool $query a query string, e.g. "foo=1&bar=2"
424 * @see self::setQueryVariables()
426 public function setQuery($query)
428 $this->_query = $query;
433 * Returns the fragment name, or false if "#" is not present in the URL.
435 * @return string|bool
437 public function getFragment()
439 return $this->_fragment;
443 * Sets the fragment name. Specify false if "#" is not present in the URL.
445 * @param string|bool $fragment a fragment excluding the leading "#", or
450 public function setFragment($fragment)
452 $this->_fragment = $fragment;
457 * Returns the query string like an array as the variables would appear in
458 * $_GET in a PHP script. If the URL does not contain a "?", an empty array
463 public function getQueryVariables()
466 preg_quote($this->getOption(self::OPTION_SEPARATOR_INPUT), '/') .
468 $parts = preg_split($pattern, $this->_query, -1, PREG_SPLIT_NO_EMPTY);
471 foreach ($parts as $part) {
472 if (strpos($part, '=') !== false) {
473 list($key, $value) = explode('=', $part, 2);
479 if ($this->getOption(self::OPTION_ENCODE_KEYS)) {
480 $key = rawurldecode($key);
482 $value = rawurldecode($value);
484 if ($this->getOption(self::OPTION_USE_BRACKETS) &&
485 preg_match('#^(.*)\[([0-9a-z_-]*)\]#i', $key, $matches)) {
490 // Ensure is an array
491 if (empty($return[$key]) || !is_array($return[$key])) {
492 $return[$key] = array();
497 $return[$key][] = $value;
499 $return[$key][$idx] = $value;
501 } elseif (!$this->getOption(self::OPTION_USE_BRACKETS)
502 && !empty($return[$key])
504 $return[$key] = (array) $return[$key];
505 $return[$key][] = $value;
507 $return[$key] = $value;
515 * Sets the query string to the specified variable in the query string.
517 * @param array $array (name => value) array
521 public function setQueryVariables(array $array)
524 $this->_query = false;
526 $this->_query = $this->buildQuery(
528 $this->getOption(self::OPTION_SEPARATOR_OUTPUT)
535 * Sets the specified variable in the query string.
537 * @param string $name variable name
538 * @param mixed $value variable value
542 public function setQueryVariable($name, $value)
544 $array = $this->getQueryVariables();
545 $array[$name] = $value;
546 $this->setQueryVariables($array);
551 * Removes the specifed variable from the query string.
553 * @param string $name a query string variable, e.g. "foo" in "?foo=1"
557 public function unsetQueryVariable($name)
559 $array = $this->getQueryVariables();
560 unset($array[$name]);
561 $this->setQueryVariables($array);
565 * Returns a string representation of this URL.
569 public function getURL()
571 // See RFC 3986, section 5.3
574 if ($this->_scheme !== false) {
575 $url .= $this->_scheme . ':';
578 $authority = $this->getAuthority();
579 if ($authority !== false) {
580 $url .= '//' . $authority;
582 $url .= $this->_path;
584 if ($this->_query !== false) {
585 $url .= '?' . $this->_query;
588 if ($this->_fragment !== false) {
589 $url .= '#' . $this->_fragment;
596 * Returns a string representation of this URL.
601 public function __toString()
603 return $this->getURL();
607 * Returns a normalized string representation of this URL. This is useful
608 * for comparison of URLs.
612 public function getNormalizedURL()
616 return $url->getUrl();
620 * Returns a normalized Net_URL2 instance.
624 public function normalize()
626 // See RFC 3886, section 6
628 // Schemes are case-insensitive
629 if ($this->_scheme) {
630 $this->_scheme = strtolower($this->_scheme);
633 // Hostnames are case-insensitive
635 $this->_host = strtolower($this->_host);
638 // Remove default port number for known schemes (RFC 3986, section 6.2.3)
641 $this->_port == getservbyname($this->_scheme, 'tcp')) {
643 $this->_port = false;
646 // Normalize case of %XX percentage-encodings (RFC 3986, section 6.2.2.1)
647 foreach (array('_userinfo', '_host', '_path') as $part) {
649 $this->$part = preg_replace('/%[0-9a-f]{2}/ie',
655 // Path segment normalization (RFC 3986, section 6.2.2.3)
656 $this->_path = self::removeDotSegments($this->_path);
658 // Scheme based normalization (RFC 3986, section 6.2.3)
659 if ($this->_host && !$this->_path) {
665 * Returns whether this instance represents an absolute URL.
669 public function isAbsolute()
671 return (bool) $this->_scheme;
675 * Returns an Net_URL2 instance representing an absolute URL relative to
678 * @param Net_URL2|string $reference relative URL
682 public function resolve($reference)
684 if (!$reference instanceof Net_URL2) {
685 $reference = new self($reference);
687 if (!$this->isAbsolute()) {
688 throw new Exception('Base-URL must be absolute');
691 // A non-strict parser may ignore a scheme in the reference if it is
692 // identical to the base URI's scheme.
693 if (!$this->getOption(self::OPTION_STRICT) && $reference->_scheme == $this->_scheme) {
694 $reference->_scheme = false;
697 $target = new self('');
698 if ($reference->_scheme !== false) {
699 $target->_scheme = $reference->_scheme;
700 $target->setAuthority($reference->getAuthority());
701 $target->_path = self::removeDotSegments($reference->_path);
702 $target->_query = $reference->_query;
704 $authority = $reference->getAuthority();
705 if ($authority !== false) {
706 $target->setAuthority($authority);
707 $target->_path = self::removeDotSegments($reference->_path);
708 $target->_query = $reference->_query;
710 if ($reference->_path == '') {
711 $target->_path = $this->_path;
712 if ($reference->_query !== false) {
713 $target->_query = $reference->_query;
715 $target->_query = $this->_query;
718 if (substr($reference->_path, 0, 1) == '/') {
719 $target->_path = self::removeDotSegments($reference->_path);
721 // Merge paths (RFC 3986, section 5.2.3)
722 if ($this->_host !== false && $this->_path == '') {
723 $target->_path = '/' . $this->_path;
725 $i = strrpos($this->_path, '/');
727 $target->_path = substr($this->_path, 0, $i + 1);
729 $target->_path .= $reference->_path;
731 $target->_path = self::removeDotSegments($target->_path);
733 $target->_query = $reference->_query;
735 $target->setAuthority($this->getAuthority());
737 $target->_scheme = $this->_scheme;
740 $target->_fragment = $reference->_fragment;
746 * Removes dots as described in RFC 3986, section 5.2.4, e.g.
747 * "/foo/../bar/baz" => "/bar/baz"
749 * @param string $path a path
751 * @return string a path
753 public static function removeDotSegments($path)
757 // Make sure not to be trapped in an infinite loop due to a bug in this
760 while ($path && $j++ < 100) {
761 if (substr($path, 0, 2) == './') {
763 $path = substr($path, 2);
764 } elseif (substr($path, 0, 3) == '../') {
766 $path = substr($path, 3);
767 } elseif (substr($path, 0, 3) == '/./' || $path == '/.') {
769 $path = '/' . substr($path, 3);
770 } elseif (substr($path, 0, 4) == '/../' || $path == '/..') {
772 $path = '/' . substr($path, 4);
773 $i = strrpos($output, '/');
774 $output = $i === false ? '' : substr($output, 0, $i);
775 } elseif ($path == '.' || $path == '..') {
780 $i = strpos($path, '/');
782 $i = strpos($path, '/', 1);
787 $output .= substr($path, 0, $i);
788 $path = substr($path, $i);
796 * Percent-encodes all non-alphanumeric characters except these: _ . - ~
797 * Similar to PHP's rawurlencode(), except that it also encodes ~ in PHP
800 * @param $raw the string to encode
803 public static function urlencode($string)
805 $encoded = rawurlencode($string);
807 // This is only necessary in PHP < 5.3.
808 $encoded = str_replace('%7E', '~', $encoded);
813 * Returns a Net_URL2 instance representing the canonical URL of the
814 * currently executing PHP script.
818 public static function getCanonical()
820 if (!isset($_SERVER['REQUEST_METHOD'])) {
821 // ALERT - no current URL
822 throw new Exception('Script was not called through a webserver');
825 // Begin with a relative URL
826 $url = new self($_SERVER['PHP_SELF']);
827 $url->_scheme = isset($_SERVER['HTTPS']) ? 'https' : 'http';
828 $url->_host = $_SERVER['SERVER_NAME'];
829 $port = $_SERVER['SERVER_PORT'];
830 if ($url->_scheme == 'http' && $port != 80 ||
831 $url->_scheme == 'https' && $port != 443) {
839 * Returns the URL used to retrieve the current request.
843 public static function getRequestedURL()
845 return self::getRequested()->getUrl();
849 * Returns a Net_URL2 instance representing the URL used to retrieve the
854 public static function getRequested()
856 if (!isset($_SERVER['REQUEST_METHOD'])) {
857 // ALERT - no current URL
858 throw new Exception('Script was not called through a webserver');
861 // Begin with a relative URL
862 $url = new self($_SERVER['REQUEST_URI']);
863 $url->_scheme = isset($_SERVER['HTTPS']) ? 'https' : 'http';
864 // Set host and possibly port
865 $url->setAuthority($_SERVER['HTTP_HOST']);
870 * Returns the value of the specified option.
872 * @param string $optionName The name of the option to retrieve
876 public function getOption($optionName)
878 return isset($this->_options[$optionName])
879 ? $this->_options[$optionName] : false;
883 * A simple version of http_build_query in userland. The encoded string is
884 * percentage encoded according to RFC 3986.
886 * @param array $data An array, which has to be converted into
887 * QUERY_STRING. Anything is possible.
888 * @param string $seperator See {@link self::OPTION_SEPARATOR_OUTPUT}
889 * @param string $key For stacked values (arrays in an array).
893 protected function buildQuery(array $data, $separator, $key = null)
896 foreach ($data as $name => $value) {
897 if ($this->getOption(self::OPTION_ENCODE_KEYS) === true) {
898 $name = rawurlencode($name);
901 if ($this->getOption(self::OPTION_USE_BRACKETS) === true) {
902 $name = $key . '[' . $name . ']';
907 if (is_array($value)) {
908 $query[] = $this->buildQuery($value, $separator, $name);
910 $query[] = $name . '=' . rawurlencode($value);
913 return implode($separator, $query);
917 * This method uses a funky regex to parse the url into the designated parts.
922 * @uses self::$_scheme, self::setAuthority(), self::$_path, self::$_query,
924 * @see self::__construct()
926 protected function parseUrl($url)
928 // The regular expression is copied verbatim from RFC 3986, appendix B.
929 // The expression does not validate the URL but matches any string.
930 preg_match('!^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?!',
934 // "path" is always present (possibly as an empty string); the rest
936 $this->_scheme = !empty($matches[1]) ? $matches[2] : false;
937 $this->setAuthority(!empty($matches[3]) ? $matches[4] : false);
938 $this->_path = $matches[5];
939 $this->_query = !empty($matches[6]) ? $matches[7] : false;
940 $this->_fragment = !empty($matches[8]) ? $matches[9] : false;