2 // +-----------------------------------------------------------------------+
3 // | Copyright (c) 2007-2008, Christian Schmidt, Peytz & Co. A/S |
4 // | All rights reserved. |
6 // | Redistribution and use in source and binary forms, with or without |
7 // | modification, are permitted provided that the following conditions |
10 // | o Redistributions of source code must retain the above copyright |
11 // | notice, this list of conditions and the following disclaimer. |
12 // | o Redistributions in binary form must reproduce the above copyright |
13 // | notice, this list of conditions and the following disclaimer in the |
14 // | documentation and/or other materials provided with the distribution.|
15 // | o The names of the authors may not be used to endorse or promote |
16 // | products derived from this software without specific prior written |
19 // | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
20 // | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
21 // | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
22 // | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
23 // | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
24 // | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
25 // | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
26 // | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
27 // | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
28 // | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
29 // | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
31 // +-----------------------------------------------------------------------+
32 // | Author: Christian Schmidt <schmidt at php dot net> |
33 // +-----------------------------------------------------------------------+
35 // $Id: URL2.php,v 1.10 2008/04/26 21:57:08 schmidt Exp $
37 // Net_URL2 Class (PHP5 Only)
39 // This code is released under the BSD License - http://www.opensource.org/licenses/bsd-license.php
41 * @license BSD License
46 * Do strict parsing in resolve() (see RFC 3986, section 5.2.2). Default
49 const OPTION_STRICT = 'strict';
52 * Represent arrays in query using PHP's [] notation. Default is true.
54 const OPTION_USE_BRACKETS = 'use_brackets';
57 * URL-encode query variable keys. Default is true.
59 const OPTION_ENCODE_KEYS = 'encode_keys';
62 * Query variable separators when parsing the query string. Every character
63 * is considered a separator. Default is specified by the
64 * arg_separator.input php.ini setting (this defaults to "&").
66 const OPTION_SEPARATOR_INPUT = 'input_separator';
69 * Query variable separator used when generating the query string. Default
70 * is specified by the arg_separator.output php.ini setting (this defaults
73 const OPTION_SEPARATOR_OUTPUT = 'output_separator';
76 * Default options corresponds to how PHP handles $_GET.
78 private $options = array(
79 self::OPTION_STRICT => true,
80 self::OPTION_USE_BRACKETS => true,
81 self::OPTION_ENCODE_KEYS => true,
82 self::OPTION_SEPARATOR_INPUT => 'x&',
83 self::OPTION_SEPARATOR_OUTPUT => 'x&',
89 private $scheme = false;
94 private $userinfo = false;
99 private $host = false;
104 private $port = false;
114 private $query = false;
119 private $fragment = false;
122 * @param string $url an absolute or relative URL
123 * @param array $options
125 public function __construct($url, $options = null)
127 $this->setOption(self::OPTION_SEPARATOR_INPUT,
128 ini_get('arg_separator.input'));
129 $this->setOption(self::OPTION_SEPARATOR_OUTPUT,
130 ini_get('arg_separator.output'));
131 if (is_array($options)) {
132 foreach ($options as $optionName => $value) {
133 $this->setOption($optionName);
137 if (preg_match('@^([a-z][a-z0-9.+-]*):@i', $url, $reg)) {
138 $this->scheme = $reg[1];
139 $url = substr($url, strlen($reg[0]));
142 if (preg_match('@^//([^/#?]+)@', $url, $reg)) {
143 $this->setAuthority($reg[1]);
144 $url = substr($url, strlen($reg[0]));
147 $i = strcspn($url, '?#');
148 $this->path = substr($url, 0, $i);
149 $url = substr($url, $i);
151 if (preg_match('@^\?([^#]*)@', $url, $reg)) {
152 $this->query = $reg[1];
153 $url = substr($url, strlen($reg[0]));
157 $this->fragment = substr($url, 1);
162 * Returns the scheme, e.g. "http" or "urn", or false if there is no
163 * scheme specified, i.e. if this is a relative URL.
165 * @return string|bool
167 public function getScheme()
169 return $this->scheme;
173 * @param string|bool $scheme
178 public function setScheme($scheme)
180 $this->scheme = $scheme;
184 * Returns the user part of the userinfo part (the part preceding the first
185 * ":"), or false if there is no userinfo part.
187 * @return string|bool
189 public function getUser()
191 return $this->userinfo !== false ? preg_replace('@:.*$@', '', $this->userinfo) : false;
195 * Returns the password part of the userinfo part (the part after the first
196 * ":"), or false if there is no userinfo part (i.e. the URL does not
197 * contain "@" in front of the hostname) or the userinfo part does not
200 * @return string|bool
202 public function getPassword()
204 return $this->userinfo !== false ? substr(strstr($this->userinfo, ':'), 1) : false;
208 * Returns the userinfo part, or false if there is none, i.e. if the
209 * authority part does not contain "@".
211 * @return string|bool
213 public function getUserinfo()
215 return $this->userinfo;
219 * Sets the userinfo part. If two arguments are passed, they are combined
220 * in the userinfo part as username ":" password.
222 * @param string|bool $userinfo userinfo or username
223 * @param string|bool $password
227 public function setUserinfo($userinfo, $password = false)
229 $this->userinfo = $userinfo;
230 if ($password !== false) {
231 $this->userinfo .= ':' . $password;
236 * Returns the host part, or false if there is no authority part, e.g.
239 * @return string|bool
241 public function getHost()
247 * @param string|bool $host
251 public function setHost($host)
257 * Returns the port number, or false if there is no port number specified,
258 * i.e. if the default port is to be used.
262 public function getPort()
268 * @param int|bool $port
272 public function setPort($port)
274 $this->port = intval($port);
278 * Returns the authority part, i.e. [ userinfo "@" ] host [ ":" port ], or
279 * false if there is no authority none.
281 * @return string|bool
283 public function getAuthority()
291 if ($this->userinfo !== false) {
292 $authority .= $this->userinfo . '@';
295 $authority .= $this->host;
297 if ($this->port !== false) {
298 $authority .= ':' . $this->port;
305 * @param string|false $authority
309 public function setAuthority($authority)
315 if (preg_match('@^(([^\@]+)\@)?([^:]+)(:(\d*))?$@', $authority, $reg)) {
317 $this->userinfo = $reg[2];
320 $this->host = $reg[3];
321 if (isset($reg[5])) {
322 $this->port = intval($reg[5]);
328 * Returns the path part (possibly an empty string).
332 public function getPath()
338 * @param string $path
342 public function setPath($path)
348 * Returns the query string (excluding the leading "?"), or false if "?"
349 * isn't present in the URL.
351 * @return string|bool
352 * @see self::getQueryVariables()
354 public function getQuery()
360 * @param string|bool $query
363 * @see self::setQueryVariables()
365 public function setQuery($query)
367 $this->query = $query;
371 * Returns the fragment name, or false if "#" isn't present in the URL.
373 * @return string|bool
375 public function getFragment()
377 return $this->fragment;
381 * @param string|bool $fragment
385 public function setFragment($fragment)
387 $this->fragment = $fragment;
391 * Returns the query string like an array as the variables would appear in
392 * $_GET in a PHP script.
396 public function getQueryVariables()
399 preg_quote($this->getOption(self::OPTION_SEPARATOR_INPUT), '/') .
401 $parts = preg_split($pattern, $this->query, -1, PREG_SPLIT_NO_EMPTY);
404 foreach ($parts as $part) {
405 if (strpos($part, '=') !== false) {
406 list($key, $value) = explode('=', $part, 2);
412 if ($this->getOption(self::OPTION_ENCODE_KEYS)) {
413 $key = rawurldecode($key);
415 $value = rawurldecode($value);
417 if ($this->getOption(self::OPTION_USE_BRACKETS) &&
418 preg_match('#^(.*)\[([0-9a-z_-]*)\]#i', $key, $matches)) {
423 // Ensure is an array
424 if (empty($return[$key]) || !is_array($return[$key])) {
425 $return[$key] = array();
430 $return[$key][] = $value;
432 $return[$key][$idx] = $value;
434 } elseif (!$this->getOption(self::OPTION_USE_BRACKETS)
435 && !empty($return[$key])
437 $return[$key] = (array) $return[$key];
438 $return[$key][] = $value;
440 $return[$key] = $value;
448 * @param array $array (name => value) array
452 public function setQueryVariables(array $array)
455 $this->query = false;
457 foreach ($array as $name => $value) {
458 if ($this->getOption(self::OPTION_ENCODE_KEYS)) {
459 $name = rawurlencode($name);
462 if (is_array($value)) {
463 foreach ($value as $k => $v) {
464 $parts[] = $this->getOption(self::OPTION_USE_BRACKETS)
465 ? sprintf('%s[%s]=%s', $name, $k, $v)
466 : ($name . '=' . $v);
468 } elseif (!is_null($value)) {
469 $parts[] = $name . '=' . $value;
474 $this->query = implode($this->getOption(self::OPTION_SEPARATOR_OUTPUT),
480 * @param string $name
481 * @param mixed $value
485 public function setQueryVariable($name, $value)
487 $array = $this->getQueryVariables();
488 $array[$name] = $value;
489 $this->setQueryVariables($array);
493 * @param string $name
497 public function unsetQueryVariable($name)
499 $array = $this->getQueryVariables();
500 unset($array[$name]);
501 $this->setQueryVariables($array);
505 * Returns a string representation of this URL.
509 public function getURL()
511 // See RFC 3986, section 5.3
514 if ($this->scheme !== false) {
515 $url .= $this->scheme . ':';
518 $authority = $this->getAuthority();
519 if ($authority !== false) {
520 $url .= '//' . $authority;
524 if ($this->query !== false) {
525 $url .= '?' . $this->query;
528 if ($this->fragment !== false) {
529 $url .= '#' . $this->fragment;
536 * Returns a normalized string representation of this URL. This is useful
537 * for comparison of URLs.
541 public function getNormalizedURL()
545 return $url->getUrl();
549 * Returns a normalized Net_URL2 instance.
553 public function normalize()
555 // See RFC 3886, section 6
557 // Schemes are case-insensitive
559 $this->scheme = strtolower($this->scheme);
562 // Hostnames are case-insensitive
564 $this->host = strtolower($this->host);
567 // Remove default port number for known schemes (RFC 3986, section 6.2.3)
570 $this->port == getservbyname($this->scheme, 'tcp')) {
575 // Normalize case of %XX percentage-encodings (RFC 3986, section 6.2.2.1)
576 foreach (array('userinfo', 'host', 'path') as $part) {
578 $this->$part = preg_replace('/%[0-9a-f]{2}/ie', 'strtoupper("\0")', $this->$part);
582 // Path segment normalization (RFC 3986, section 6.2.2.3)
583 $this->path = self::removeDotSegments($this->path);
585 // Scheme based normalization (RFC 3986, section 6.2.3)
586 if ($this->host && !$this->path) {
592 * Returns whether this instance represents an absolute URL.
596 public function isAbsolute()
598 return (bool) $this->scheme;
602 * Returns an Net_URL2 instance representing an absolute URL relative to
605 * @param Net_URL2|string $reference relative URL
609 public function resolve($reference)
611 if (is_string($reference)) {
612 $reference = new self($reference);
614 if (!$this->isAbsolute()) {
615 throw new Exception('Base-URL must be absolute');
618 // A non-strict parser may ignore a scheme in the reference if it is
619 // identical to the base URI's scheme.
620 if (!$this->getOption(self::OPTION_STRICT) && $reference->scheme == $this->scheme) {
621 $reference->scheme = false;
624 $target = new self('');
625 if ($reference->scheme !== false) {
626 $target->scheme = $reference->scheme;
627 $target->setAuthority($reference->getAuthority());
628 $target->path = self::removeDotSegments($reference->path);
629 $target->query = $reference->query;
631 $authority = $reference->getAuthority();
632 if ($authority !== false) {
633 $target->setAuthority($authority);
634 $target->path = self::removeDotSegments($reference->path);
635 $target->query = $reference->query;
637 if ($reference->path == '') {
638 $target->path = $this->path;
639 if ($reference->query !== false) {
640 $target->query = $reference->query;
642 $target->query = $this->query;
645 if (substr($reference->path, 0, 1) == '/') {
646 $target->path = self::removeDotSegments($reference->path);
648 // Merge paths (RFC 3986, section 5.2.3)
649 if ($this->host !== false && $this->path == '') {
650 $target->path = '/' . $this->path;
652 $i = strrpos($this->path, '/');
654 $target->path = substr($this->path, 0, $i + 1);
656 $target->path .= $reference->path;
658 $target->path = self::removeDotSegments($target->path);
660 $target->query = $reference->query;
662 $target->setAuthority($this->getAuthority());
664 $target->scheme = $this->scheme;
667 $target->fragment = $reference->fragment;
673 * Removes dots as described in RFC 3986, section 5.2.4, e.g.
674 * "/foo/../bar/baz" => "/bar/baz"
676 * @param string $path a path
678 * @return string a path
680 private static function removeDotSegments($path)
684 // Make sure not to be trapped in an infinite loop due to a bug in this
687 while ($path && $j++ < 100) {
689 if (substr($path, 0, 2) == './') {
690 $path = substr($path, 2);
691 } elseif (substr($path, 0, 3) == '../') {
692 $path = substr($path, 3);
695 } elseif (substr($path, 0, 3) == '/./' || $path == '/.') {
696 $path = '/' . substr($path, 3);
699 } elseif (substr($path, 0, 4) == '/../' || $path == '/..') {
700 $path = '/' . substr($path, 4);
701 $i = strrpos($output, '/');
702 $output = $i === false ? '' : substr($output, 0, $i);
705 } elseif ($path == '.' || $path == '..') {
710 $i = strpos($path, '/');
712 $i = strpos($path, '/', 1);
717 $output .= substr($path, 0, $i);
718 $path = substr($path, $i);
726 * Returns a Net_URL2 instance representing the canonical URL of the
727 * currently executing PHP script.
731 public static function getCanonical()
733 if (!isset($_SERVER['REQUEST_METHOD'])) {
734 // ALERT - no current URL
735 throw new Exception('Script was not called through a webserver');
738 // Begin with a relative URL
739 $url = new self($_SERVER['PHP_SELF']);
740 $url->scheme = isset($_SERVER['HTTPS']) ? 'https' : 'http';
741 $url->host = $_SERVER['SERVER_NAME'];
742 $port = intval($_SERVER['SERVER_PORT']);
743 if ($url->scheme == 'http' && $port != 80 ||
744 $url->scheme == 'https' && $port != 443) {
752 * Returns the URL used to retrieve the current request.
756 public static function getRequestedURL()
758 return self::getRequested()->getUrl();
762 * Returns a Net_URL2 instance representing the URL used to retrieve the
767 public static function getRequested()
769 if (!isset($_SERVER['REQUEST_METHOD'])) {
770 // ALERT - no current URL
771 throw new Exception('Script was not called through a webserver');
774 // Begin with a relative URL
775 $url = new self($_SERVER['REQUEST_URI']);
776 $url->scheme = isset($_SERVER['HTTPS']) ? 'https' : 'http';
777 // Set host and possibly port
778 $url->setAuthority($_SERVER['HTTP_HOST']);
783 * Sets the specified option.
785 * @param string $optionName a self::OPTION_ constant
786 * @param mixed $value option value
789 * @see self::OPTION_STRICT
790 * @see self::OPTION_USE_BRACKETS
791 * @see self::OPTION_ENCODE_KEYS
793 function setOption($optionName, $value)
795 if (!array_key_exists($optionName, $this->options)) {
798 $this->options[$optionName] = $value;
802 * Returns the value of the specified option.
804 * @param string $optionName The name of the option to retrieve
808 function getOption($optionName)
810 return isset($this->options[$optionName])
811 ? $this->options[$optionName] : false;