4 * HTML Purifier's internal representation of a URI.
6 * Internal data-structures are completely escaped. If the data needs
7 * to be used in a non-URI context (which is very unlikely), be sure
8 * to decode it first. The URI may not necessarily be well-formed until
9 * validate() is called.
11 class HTMLPurifier_URI
14 public $scheme, $userinfo, $host, $port, $path, $query, $fragment;
17 * @note Automatically normalizes scheme and port
19 public function __construct($scheme, $userinfo, $host, $port, $path, $query, $fragment) {
20 $this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme);
21 $this->userinfo = $userinfo;
23 $this->port = is_null($port) ? $port : (int) $port;
25 $this->query = $query;
26 $this->fragment = $fragment;
30 * Retrieves a scheme object corresponding to the URI's scheme/default
31 * @param $config Instance of HTMLPurifier_Config
32 * @param $context Instance of HTMLPurifier_Context
33 * @return Scheme object appropriate for validating this URI
35 public function getSchemeObj($config, $context) {
36 $registry = HTMLPurifier_URISchemeRegistry::instance();
37 if ($this->scheme !== null) {
38 $scheme_obj = $registry->getScheme($this->scheme, $config, $context);
39 if (!$scheme_obj) return false; // invalid scheme, clean it out
41 // no scheme: retrieve the default one
42 $def = $config->getDefinition('URI');
43 $scheme_obj = $registry->getScheme($def->defaultScheme, $config, $context);
45 // something funky happened to the default scheme object
47 'Default scheme object "' . $def->defaultScheme . '" was not readable',
57 * Generic validation method applicable for all schemes. May modify
58 * this URI in order to get it into a compliant form.
59 * @param $config Instance of HTMLPurifier_Config
60 * @param $context Instance of HTMLPurifier_Context
61 * @return True if validation/filtering succeeds, false if failure
63 public function validate($config, $context) {
65 // ABNF definitions from RFC 3986
66 $chars_sub_delims = '!$&\'()*+,;=';
67 $chars_gen_delims = ':/?#[]@';
68 $chars_pchar = $chars_sub_delims . ':@';
70 // validate scheme (MUST BE FIRST!)
71 if (!is_null($this->scheme) && is_null($this->host)) {
72 $def = $config->getDefinition('URI');
73 if ($def->defaultScheme === $this->scheme) {
79 if (!is_null($this->host)) {
80 $host_def = new HTMLPurifier_AttrDef_URI_Host();
81 $this->host = $host_def->validate($this->host, $config, $context);
82 if ($this->host === false) $this->host = null;
86 if (!is_null($this->userinfo)) {
87 $encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':');
88 $this->userinfo = $encoder->encode($this->userinfo);
92 if (!is_null($this->port)) {
93 if ($this->port < 1 || $this->port > 65535) $this->port = null;
97 $path_parts = array();
98 $segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/');
99 if (!is_null($this->host)) {
100 // path-abempty (hier and relative)
101 $this->path = $segments_encoder->encode($this->path);
102 } elseif ($this->path !== '' && $this->path[0] === '/') {
103 // path-absolute (hier and relative)
104 if (strlen($this->path) >= 2 && $this->path[1] === '/') {
105 // This shouldn't ever happen!
108 $this->path = $segments_encoder->encode($this->path);
110 } elseif (!is_null($this->scheme) && $this->path !== '') {
111 // path-rootless (hier)
112 // Short circuit evaluation means we don't need to check nz
113 $this->path = $segments_encoder->encode($this->path);
114 } elseif (is_null($this->scheme) && $this->path !== '') {
115 // path-noscheme (relative)
116 // (once again, not checking nz)
117 $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@');
118 $c = strpos($this->path, '/');
121 $segment_nc_encoder->encode(substr($this->path, 0, $c)) .
122 $segments_encoder->encode(substr($this->path, $c));
124 $this->path = $segment_nc_encoder->encode($this->path);
127 // path-empty (hier and relative)
128 $this->path = ''; // just to be safe
131 // qf = query and fragment
132 $qf_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/?');
134 if (!is_null($this->query)) {
135 $this->query = $qf_encoder->encode($this->query);
138 if (!is_null($this->fragment)) {
139 $this->fragment = $qf_encoder->encode($this->fragment);
147 * Convert URI back to string
148 * @return String URI appropriate for output
150 public function toString() {
151 // reconstruct authority
153 if (!is_null($this->host)) {
155 if(!is_null($this->userinfo)) $authority .= $this->userinfo . '@';
156 $authority .= $this->host;
157 if(!is_null($this->port)) $authority .= ':' . $this->port;
160 // reconstruct the result
162 if (!is_null($this->scheme)) $result .= $this->scheme . ':';
163 if (!is_null($authority)) $result .= '//' . $authority;
164 $result .= $this->path;
165 if (!is_null($this->query)) $result .= '?' . $this->query;
166 if (!is_null($this->fragment)) $result .= '#' . $this->fragment;
173 // vim: et sw=4 sts=4