3 * Net_URL2, a class representing a URL as per RFC 3986.
9 * Copyright (c) 2007-2009, Peytz & Co. A/S
10 * All rights reserved.
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
16 * * Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * * Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in
20 * the documentation and/or other materials provided with the distribution.
21 * * Neither the name of the PHP_LexerGenerator nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
26 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
27 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
29 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
33 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 * @category Networking
39 * @author Christian Schmidt <chsc@peytz.dk>
40 * @copyright 2007-2008 Peytz & Co. A/S
41 * @license http://www.opensource.org/licenses/bsd-license.php New BSD License
42 * @version CVS: $Id: URL2.php 286661 2009-08-02 12:50:54Z schmidt $
43 * @link http://www.rfc-editor.org/rfc/rfc3986.txt
47 * Represents a URL as per RFC 3986.
49 * @category Networking
51 * @author Christian Schmidt <chsc@peytz.dk>
52 * @copyright 2007-2008 Peytz & Co. ApS
53 * @license http://www.opensource.org/licenses/bsd-license.php New BSD License
54 * @version Release: @package_version@
55 * @link http://pear.php.net/package/Net_URL2
60 * Do strict parsing in resolve() (see RFC 3986, section 5.2.2). Default
63 const OPTION_STRICT = 'strict';
66 * Represent arrays in query using PHP's [] notation. Default is true.
68 const OPTION_USE_BRACKETS = 'use_brackets';
71 * URL-encode query variable keys. Default is true.
73 const OPTION_ENCODE_KEYS = 'encode_keys';
76 * Query variable separators when parsing the query string. Every character
77 * is considered a separator. Default is specified by the
78 * arg_separator.input php.ini setting (this defaults to "&").
80 const OPTION_SEPARATOR_INPUT = 'input_separator';
83 * Query variable separator used when generating the query string. Default
84 * is specified by the arg_separator.output php.ini setting (this defaults
87 const OPTION_SEPARATOR_OUTPUT = 'output_separator';
90 * Default options corresponds to how PHP handles $_GET.
92 private $_options = array(
93 self::OPTION_STRICT => true,
94 self::OPTION_USE_BRACKETS => true,
95 self::OPTION_ENCODE_KEYS => true,
96 self::OPTION_SEPARATOR_INPUT => 'x&',
97 self::OPTION_SEPARATOR_OUTPUT => 'x&',
103 private $_scheme = false;
108 private $_userinfo = false;
113 private $_host = false;
118 private $_port = false;
128 private $_query = false;
133 private $_fragment = false;
138 * @param string $url an absolute or relative URL
139 * @param array $options an array of OPTION_xxx constants
141 public function __construct($url, $options = null)
143 $this->setOption(self::OPTION_SEPARATOR_INPUT,
144 ini_get('arg_separator.input'));
145 $this->setOption(self::OPTION_SEPARATOR_OUTPUT,
146 ini_get('arg_separator.output'));
147 if (is_array($options)) {
148 foreach ($options as $optionName => $value) {
149 $this->setOption($optionName, $value);
153 if (preg_match('@^([a-z][a-z0-9.+-]*):@i', $url, $reg)) {
154 $this->_scheme = $reg[1];
155 $url = substr($url, strlen($reg[0]));
158 if (preg_match('@^//([^/#?]+)@', $url, $reg)) {
159 $this->setAuthority($reg[1]);
160 $url = substr($url, strlen($reg[0]));
163 $i = strcspn($url, '?#');
164 $this->_path = substr($url, 0, $i);
165 $url = substr($url, $i);
167 if (preg_match('@^\?([^#]*)@', $url, $reg)) {
168 $this->_query = $reg[1];
169 $url = substr($url, strlen($reg[0]));
173 $this->_fragment = substr($url, 1);
180 * This method will magically set the value of a private variable ($var)
181 * with the value passed as the args
183 * @param string $var The private variable to set.
184 * @param mixed $arg An argument of any type.
187 public function __set($var, $arg)
189 $method = 'set' . $var;
190 if (method_exists($this, $method)) {
191 $this->$method($arg);
198 * This is the magic get method to retrieve the private variable
199 * that was set by either __set() or it's setter...
201 * @param string $var The property name to retrieve.
202 * @return mixed $this->$var Either a boolean false if the
203 * property is not set or the value
204 * of the private property.
206 public function __get($var)
208 $method = 'get' . $var;
209 if (method_exists($this, $method)) {
210 return $this->$method();
217 * Returns the scheme, e.g. "http" or "urn", or false if there is no
218 * scheme specified, i.e. if this is a relative URL.
220 * @return string|bool
222 public function getScheme()
224 return $this->_scheme;
228 * Sets the scheme, e.g. "http" or "urn". Specify false if there is no
229 * scheme specified, i.e. if this is a relative URL.
231 * @param string|bool $scheme e.g. "http" or "urn", or false if there is no
232 * scheme specified, i.e. if this is a relative
238 public function setScheme($scheme)
240 $this->_scheme = $scheme;
244 * Returns the user part of the userinfo part (the part preceding the first
245 * ":"), or false if there is no userinfo part.
247 * @return string|bool
249 public function getUser()
251 return $this->_userinfo !== false
252 ? preg_replace('@:.*$@', '', $this->_userinfo)
257 * Returns the password part of the userinfo part (the part after the first
258 * ":"), or false if there is no userinfo part (i.e. the URL does not
259 * contain "@" in front of the hostname) or the userinfo part does not
262 * @return string|bool
264 public function getPassword()
266 return $this->_userinfo !== false
267 ? substr(strstr($this->_userinfo, ':'), 1)
272 * Returns the userinfo part, or false if there is none, i.e. if the
273 * authority part does not contain "@".
275 * @return string|bool
277 public function getUserinfo()
279 return $this->_userinfo;
283 * Sets the userinfo part. If two arguments are passed, they are combined
284 * in the userinfo part as username ":" password.
286 * @param string|bool $userinfo userinfo or username
287 * @param string|bool $password optional password, or false
291 public function setUserinfo($userinfo, $password = false)
293 $this->_userinfo = $userinfo;
294 if ($password !== false) {
295 $this->_userinfo .= ':' . $password;
300 * Returns the host part, or false if there is no authority part, e.g.
303 * @return string|bool a hostname, an IP address, or false
305 public function getHost()
311 * Sets the host part. Specify false if there is no authority part, e.g.
314 * @param string|bool $host a hostname, an IP address, or false
318 public function setHost($host)
320 $this->_host = $host;
324 * Returns the port number, or false if there is no port number specified,
325 * i.e. if the default port is to be used.
329 public function getPort()
335 * Sets the port number. Specify false if there is no port number specified,
336 * i.e. if the default port is to be used.
338 * @param int|bool $port a port number, or false
342 public function setPort($port)
344 $this->_port = intval($port);
348 * Returns the authority part, i.e. [ userinfo "@" ] host [ ":" port ], or
349 * false if there is no authority.
351 * @return string|bool
353 public function getAuthority()
361 if ($this->_userinfo !== false) {
362 $authority .= $this->_userinfo . '@';
365 $authority .= $this->_host;
367 if ($this->_port !== false) {
368 $authority .= ':' . $this->_port;
375 * Sets the authority part, i.e. [ userinfo "@" ] host [ ":" port ]. Specify
376 * false if there is no authority.
378 * @param string|false $authority a hostname or an IP addresse, possibly
379 * with userinfo prefixed and port number
380 * appended, e.g. "foo:bar@example.org:81".
384 public function setAuthority($authority)
386 $this->_userinfo = false;
387 $this->_host = false;
388 $this->_port = false;
389 if (preg_match('@^(([^\@]*)\@)?([^:]+)(:(\d*))?$@', $authority, $reg)) {
391 $this->_userinfo = $reg[2];
394 $this->_host = $reg[3];
395 if (isset($reg[5])) {
396 $this->_port = intval($reg[5]);
402 * Returns the path part (possibly an empty string).
406 public function getPath()
412 * Sets the path part (possibly an empty string).
414 * @param string $path a path
418 public function setPath($path)
420 $this->_path = $path;
424 * Returns the query string (excluding the leading "?"), or false if "?"
425 * is not present in the URL.
427 * @return string|bool
428 * @see self::getQueryVariables()
430 public function getQuery()
432 return $this->_query;
436 * Sets the query string (excluding the leading "?"). Specify false if "?"
437 * is not present in the URL.
439 * @param string|bool $query a query string, e.g. "foo=1&bar=2"
442 * @see self::setQueryVariables()
444 public function setQuery($query)
446 $this->_query = $query;
450 * Returns the fragment name, or false if "#" is not present in the URL.
452 * @return string|bool
454 public function getFragment()
456 return $this->_fragment;
460 * Sets the fragment name. Specify false if "#" is not present in the URL.
462 * @param string|bool $fragment a fragment excluding the leading "#", or
467 public function setFragment($fragment)
469 $this->_fragment = $fragment;
473 * Returns the query string like an array as the variables would appear in
474 * $_GET in a PHP script. If the URL does not contain a "?", an empty array
479 public function getQueryVariables()
482 preg_quote($this->getOption(self::OPTION_SEPARATOR_INPUT), '/') .
484 $parts = preg_split($pattern, $this->_query, -1, PREG_SPLIT_NO_EMPTY);
487 foreach ($parts as $part) {
488 if (strpos($part, '=') !== false) {
489 list($key, $value) = explode('=', $part, 2);
495 if ($this->getOption(self::OPTION_ENCODE_KEYS)) {
496 $key = rawurldecode($key);
498 $value = rawurldecode($value);
500 if ($this->getOption(self::OPTION_USE_BRACKETS) &&
501 preg_match('#^(.*)\[([0-9a-z_-]*)\]#i', $key, $matches)) {
506 // Ensure is an array
507 if (empty($return[$key]) || !is_array($return[$key])) {
508 $return[$key] = array();
513 $return[$key][] = $value;
515 $return[$key][$idx] = $value;
517 } elseif (!$this->getOption(self::OPTION_USE_BRACKETS)
518 && !empty($return[$key])
520 $return[$key] = (array) $return[$key];
521 $return[$key][] = $value;
523 $return[$key] = $value;
531 * Sets the query string to the specified variable in the query string.
533 * @param array $array (name => value) array
537 public function setQueryVariables(array $array)
540 $this->_query = false;
542 foreach ($array as $name => $value) {
543 if ($this->getOption(self::OPTION_ENCODE_KEYS)) {
544 $name = self::urlencode($name);
547 if (is_array($value)) {
548 foreach ($value as $k => $v) {
549 $parts[] = $this->getOption(self::OPTION_USE_BRACKETS)
550 ? sprintf('%s[%s]=%s', $name, $k, $v)
551 : ($name . '=' . $v);
553 } elseif (!is_null($value)) {
554 $parts[] = $name . '=' . self::urlencode($value);
559 $this->_query = implode($this->getOption(self::OPTION_SEPARATOR_OUTPUT),
565 * Sets the specified variable in the query string.
567 * @param string $name variable name
568 * @param mixed $value variable value
572 public function setQueryVariable($name, $value)
574 $array = $this->getQueryVariables();
575 $array[$name] = $value;
576 $this->setQueryVariables($array);
580 * Removes the specifed variable from the query string.
582 * @param string $name a query string variable, e.g. "foo" in "?foo=1"
586 public function unsetQueryVariable($name)
588 $array = $this->getQueryVariables();
589 unset($array[$name]);
590 $this->setQueryVariables($array);
594 * Returns a string representation of this URL.
598 public function getURL()
600 // See RFC 3986, section 5.3
603 if ($this->_scheme !== false) {
604 $url .= $this->_scheme . ':';
607 $authority = $this->getAuthority();
608 if ($authority !== false) {
609 $url .= '//' . $authority;
611 $url .= $this->_path;
613 if ($this->_query !== false) {
614 $url .= '?' . $this->_query;
617 if ($this->_fragment !== false) {
618 $url .= '#' . $this->_fragment;
625 * Returns a string representation of this URL.
630 public function __toString()
632 return $this->getURL();
636 * Returns a normalized string representation of this URL. This is useful
637 * for comparison of URLs.
641 public function getNormalizedURL()
645 return $url->getUrl();
649 * Returns a normalized Net_URL2 instance.
653 public function normalize()
655 // See RFC 3886, section 6
657 // Schemes are case-insensitive
658 if ($this->_scheme) {
659 $this->_scheme = strtolower($this->_scheme);
662 // Hostnames are case-insensitive
664 $this->_host = strtolower($this->_host);
667 // Remove default port number for known schemes (RFC 3986, section 6.2.3)
670 $this->_port == getservbyname($this->_scheme, 'tcp')) {
672 $this->_port = false;
675 // Normalize case of %XX percentage-encodings (RFC 3986, section 6.2.2.1)
676 foreach (array('_userinfo', '_host', '_path') as $part) {
678 $this->$part = preg_replace('/%[0-9a-f]{2}/ie',
684 // Path segment normalization (RFC 3986, section 6.2.2.3)
685 $this->_path = self::removeDotSegments($this->_path);
687 // Scheme based normalization (RFC 3986, section 6.2.3)
688 if ($this->_host && !$this->_path) {
694 * Returns whether this instance represents an absolute URL.
698 public function isAbsolute()
700 return (bool) $this->_scheme;
704 * Returns an Net_URL2 instance representing an absolute URL relative to
707 * @param Net_URL2|string $reference relative URL
711 public function resolve($reference)
713 if (!$reference instanceof Net_URL2) {
714 $reference = new self($reference);
716 if (!$this->isAbsolute()) {
717 throw new Exception('Base-URL must be absolute');
720 // A non-strict parser may ignore a scheme in the reference if it is
721 // identical to the base URI's scheme.
722 if (!$this->getOption(self::OPTION_STRICT) && $reference->_scheme == $this->_scheme) {
723 $reference->_scheme = false;
726 $target = new self('');
727 if ($reference->_scheme !== false) {
728 $target->_scheme = $reference->_scheme;
729 $target->setAuthority($reference->getAuthority());
730 $target->_path = self::removeDotSegments($reference->_path);
731 $target->_query = $reference->_query;
733 $authority = $reference->getAuthority();
734 if ($authority !== false) {
735 $target->setAuthority($authority);
736 $target->_path = self::removeDotSegments($reference->_path);
737 $target->_query = $reference->_query;
739 if ($reference->_path == '') {
740 $target->_path = $this->_path;
741 if ($reference->_query !== false) {
742 $target->_query = $reference->_query;
744 $target->_query = $this->_query;
747 if (substr($reference->_path, 0, 1) == '/') {
748 $target->_path = self::removeDotSegments($reference->_path);
750 // Merge paths (RFC 3986, section 5.2.3)
751 if ($this->_host !== false && $this->_path == '') {
752 $target->_path = '/' . $this->_path;
754 $i = strrpos($this->_path, '/');
756 $target->_path = substr($this->_path, 0, $i + 1);
758 $target->_path .= $reference->_path;
760 $target->_path = self::removeDotSegments($target->_path);
762 $target->_query = $reference->_query;
764 $target->setAuthority($this->getAuthority());
766 $target->_scheme = $this->_scheme;
769 $target->_fragment = $reference->_fragment;
775 * Removes dots as described in RFC 3986, section 5.2.4, e.g.
776 * "/foo/../bar/baz" => "/bar/baz"
778 * @param string $path a path
780 * @return string a path
782 public static function removeDotSegments($path)
786 // Make sure not to be trapped in an infinite loop due to a bug in this
789 while ($path && $j++ < 100) {
790 if (substr($path, 0, 2) == './') {
792 $path = substr($path, 2);
793 } elseif (substr($path, 0, 3) == '../') {
795 $path = substr($path, 3);
796 } elseif (substr($path, 0, 3) == '/./' || $path == '/.') {
798 $path = '/' . substr($path, 3);
799 } elseif (substr($path, 0, 4) == '/../' || $path == '/..') {
801 $path = '/' . substr($path, 4);
802 $i = strrpos($output, '/');
803 $output = $i === false ? '' : substr($output, 0, $i);
804 } elseif ($path == '.' || $path == '..') {
809 $i = strpos($path, '/');
811 $i = strpos($path, '/', 1);
816 $output .= substr($path, 0, $i);
817 $path = substr($path, $i);
825 * Percent-encodes all non-alphanumeric characters except these: _ . - ~
826 * Similar to PHP's rawurlencode(), except that it also encodes ~ in PHP
829 * @param $raw the string to encode
832 public static function urlencode($string)
834 $encoded = rawurlencode($string);
835 // This is only necessary in PHP < 5.3.
836 $encoded = str_replace('%7E', '~', $encoded);
841 * Returns a Net_URL2 instance representing the canonical URL of the
842 * currently executing PHP script.
846 public static function getCanonical()
848 if (!isset($_SERVER['REQUEST_METHOD'])) {
849 // ALERT - no current URL
850 throw new Exception('Script was not called through a webserver');
853 // Begin with a relative URL
854 $url = new self($_SERVER['PHP_SELF']);
855 $url->_scheme = isset($_SERVER['HTTPS']) ? 'https' : 'http';
856 $url->_host = $_SERVER['SERVER_NAME'];
857 $port = intval($_SERVER['SERVER_PORT']);
858 if ($url->_scheme == 'http' && $port != 80 ||
859 $url->_scheme == 'https' && $port != 443) {
867 * Returns the URL used to retrieve the current request.
871 public static function getRequestedURL()
873 return self::getRequested()->getUrl();
877 * Returns a Net_URL2 instance representing the URL used to retrieve the
882 public static function getRequested()
884 if (!isset($_SERVER['REQUEST_METHOD'])) {
885 // ALERT - no current URL
886 throw new Exception('Script was not called through a webserver');
889 // Begin with a relative URL
890 $url = new self($_SERVER['REQUEST_URI']);
891 $url->_scheme = isset($_SERVER['HTTPS']) ? 'https' : 'http';
892 // Set host and possibly port
893 $url->setAuthority($_SERVER['HTTP_HOST']);
898 * Sets the specified option.
900 * @param string $optionName a self::OPTION_ constant
901 * @param mixed $value option value
904 * @see self::OPTION_STRICT
905 * @see self::OPTION_USE_BRACKETS
906 * @see self::OPTION_ENCODE_KEYS
908 function setOption($optionName, $value)
910 if (!array_key_exists($optionName, $this->_options)) {
913 $this->_options[$optionName] = $value;
917 * Returns the value of the specified option.
919 * @param string $optionName The name of the option to retrieve
923 function getOption($optionName)
925 return isset($this->_options[$optionName])
926 ? $this->_options[$optionName] : false;