9 * @author Greg Beaver <cellog@php.net>
10 * @copyright 1997-2009 The Authors
11 * @license http://opensource.org/licenses/bsd-license.php New BSD License
12 * @link http://pear.php.net/package/PEAR
13 * @since File available since Release 1.4.0a1
17 * For downloading xml files
19 require_once 'PEAR.php';
20 require_once 'PEAR/XMLParser.php';
21 require_once 'PEAR/Proxy.php';
24 * Intelligently retrieve data, following hyperlinks if necessary, and re-directing
28 * @author Greg Beaver <cellog@php.net>
29 * @copyright 1997-2009 The Authors
30 * @license http://opensource.org/licenses/bsd-license.php New BSD License
31 * @version Release: 1.10.5
32 * @link http://pear.php.net/package/PEAR
33 * @since Class available since Release 1.4.0a1
40 function __construct(&$config, $options = array())
42 $this->config = &$config;
43 $this->_options = $options;
47 * Retrieve REST data, but always retrieve the local cache if it is available.
49 * This is useful for elements that should never change, such as information on a particular
51 * @param string full URL to this resource
52 * @param array|false contents of the accept-encoding header
53 * @param boolean if true, xml will be returned as a string, otherwise, xml will be
54 * parsed using PEAR_XMLParser
55 * @return string|array
57 function retrieveCacheFirst($url, $accept = false, $forcestring = false, $channel = false)
59 $cachefile = $this->config->get('cache_dir') . DIRECTORY_SEPARATOR .
60 md5($url) . 'rest.cachefile';
62 if (file_exists($cachefile)) {
63 return unserialize(implode('', file($cachefile)));
66 return $this->retrieveData($url, $accept, $forcestring, $channel);
70 * Retrieve a remote REST resource
71 * @param string full URL to this resource
72 * @param array|false contents of the accept-encoding header
73 * @param boolean if true, xml will be returned as a string, otherwise, xml will be
74 * parsed using PEAR_XMLParser
75 * @return string|array
77 function retrieveData($url, $accept = false, $forcestring = false, $channel = false)
79 $cacheId = $this->getCacheId($url);
80 if ($ret = $this->useLocalCache($url, $cacheId)) {
84 $file = $trieddownload = false;
85 if (!isset($this->_options['offline'])) {
86 $trieddownload = true;
87 $file = $this->downloadHttp($url, $cacheId ? $cacheId['lastChange'] : false, $accept, $channel);
90 if (PEAR::isError($file)) {
91 if ($file->getCode() !== -9276) {
95 $trieddownload = false;
96 $file = false; // use local copy if available on socket connect error
100 $ret = $this->getCache($url);
101 if (!PEAR::isError($ret) && $trieddownload) {
102 // reset the age of the cache if the server says it was unmodified
103 $result = $this->saveCache($url, $ret, null, true, $cacheId);
104 if (PEAR::isError($result)) {
105 return PEAR::raiseError($result->getMessage());
112 if (is_array($file)) {
114 $lastmodified = $file[1];
118 $lastmodified = false;
123 $result = $this->saveCache($url, $content, $lastmodified, false, $cacheId);
124 if (PEAR::isError($result)) {
125 return PEAR::raiseError($result->getMessage());
131 if (isset($headers['content-type'])) {
132 $content_type = explode(";", $headers['content-type']);
133 $content_type = $content_type[0];
134 switch ($content_type) {
136 case 'application/xml' :
138 if ($content_type === 'text/plain') {
139 $check = substr($content, 0, 5);
140 if ($check !== '<?xml') {
145 $parser = new PEAR_XMLParser;
146 PEAR::pushErrorHandling(PEAR_ERROR_RETURN);
147 $err = $parser->parse($content);
148 PEAR::popErrorHandling();
149 if (PEAR::isError($err)) {
150 return PEAR::raiseError('Invalid xml downloaded from "' . $url . '": ' .
153 $content = $parser->getData();
156 // use it as a string
160 $parser = new PEAR_XMLParser;
161 $parser->parse($content);
162 $content = $parser->getData();
165 $result = $this->saveCache($url, $content, $lastmodified, false, $cacheId);
166 if (PEAR::isError($result)) {
167 return PEAR::raiseError($result->getMessage());
173 function useLocalCache($url, $cacheid = null)
175 if ($cacheid === null) {
176 $cacheidfile = $this->config->get('cache_dir') . DIRECTORY_SEPARATOR .
177 md5($url) . 'rest.cacheid';
178 if (!file_exists($cacheidfile)) {
182 $cacheid = unserialize(implode('', file($cacheidfile)));
185 $cachettl = $this->config->get('cache_ttl');
186 // If cache is newer than $cachettl seconds, we use the cache!
187 if (time() - $cacheid['age'] < $cachettl) {
188 return $this->getCache($url);
194 function getCacheId($url)
196 $cacheidfile = $this->config->get('cache_dir') . DIRECTORY_SEPARATOR .
197 md5($url) . 'rest.cacheid';
199 if (!file_exists($cacheidfile)) {
203 $ret = unserialize(implode('', file($cacheidfile)));
207 function getCache($url)
209 $cachefile = $this->config->get('cache_dir') . DIRECTORY_SEPARATOR .
210 md5($url) . 'rest.cachefile';
212 if (!file_exists($cachefile)) {
213 return PEAR::raiseError('No cached content available for "' . $url . '"');
216 return unserialize(implode('', file($cachefile)));
220 * @param string full URL to REST resource
221 * @param string original contents of the REST resource
222 * @param array HTTP Last-Modified and ETag headers
223 * @param bool if true, then the cache id file should be regenerated to
224 * trigger a new time-to-live value
226 function saveCache($url, $contents, $lastmodified, $nochange = false, $cacheid = null)
228 $cache_dir = $this->config->get('cache_dir');
229 $d = $cache_dir . DIRECTORY_SEPARATOR . md5($url);
230 $cacheidfile = $d . 'rest.cacheid';
231 $cachefile = $d . 'rest.cachefile';
233 if (!is_dir($cache_dir)) {
234 if (System::mkdir(array('-p', $cache_dir)) === false) {
235 return PEAR::raiseError("The value of config option cache_dir ($cache_dir) is not a directory and attempts to create the directory failed.");
239 if (!is_writeable($cache_dir)) {
240 // If writing to the cache dir is not going to work, silently do nothing.
241 // An ugly hack, but retains compat with PEAR 1.9.1 where many commands
242 // work fine as non-root user (w/out write access to default cache dir).
246 if ($cacheid === null && $nochange) {
247 $cacheid = unserialize(implode('', file($cacheidfile)));
250 $idData = serialize(array(
252 'lastChange' => ($nochange ? $cacheid['lastChange'] : $lastmodified),
255 $result = $this->saveCacheFile($cacheidfile, $idData);
256 if (PEAR::isError($result)) {
258 } elseif ($nochange) {
262 $result = $this->saveCacheFile($cachefile, serialize($contents));
263 if (PEAR::isError($result)) {
264 if (file_exists($cacheidfile)) {
265 @unlink($cacheidfile);
274 function saveCacheFile($file, $contents)
276 $len = strlen($contents);
278 $cachefile_fp = @fopen($file, 'xb'); // x is the O_CREAT|O_EXCL mode
279 if ($cachefile_fp !== false) { // create file
280 if (fwrite($cachefile_fp, $contents, $len) < $len) {
281 fclose($cachefile_fp);
282 return PEAR::raiseError("Could not write $file.");
284 } else { // update file
285 $cachefile_fp = @fopen($file, 'r+b'); // do not truncate file
286 if (!$cachefile_fp) {
287 return PEAR::raiseError("Could not open $file for writing.");
291 $not_symlink = !is_link($file); // see bug #18834
293 $cachefile_lstat = lstat($file);
294 $cachefile_fstat = fstat($cachefile_fp);
295 $not_symlink = $cachefile_lstat['mode'] == $cachefile_fstat['mode']
296 && $cachefile_lstat['ino'] == $cachefile_fstat['ino']
297 && $cachefile_lstat['dev'] == $cachefile_fstat['dev']
298 && $cachefile_fstat['nlink'] === 1;
302 ftruncate($cachefile_fp, 0); // NOW truncate
303 if (fwrite($cachefile_fp, $contents, $len) < $len) {
304 fclose($cachefile_fp);
305 return PEAR::raiseError("Could not write $file.");
308 fclose($cachefile_fp);
309 $link = function_exists('readlink') ? readlink($file) : $file;
310 return PEAR::raiseError('SECURITY ERROR: Will not write to ' . $file . ' as it is symlinked to ' . $link . ' - Possible symlink attack');
314 fclose($cachefile_fp);
319 * Efficiently Download a file through HTTP. Returns downloaded file as a string in-memory
320 * This is best used for small files
322 * If an HTTP proxy has been configured (http_proxy PEAR_Config
323 * setting), the proxy will be used.
325 * @param string $url the URL to download
326 * @param string $save_dir directory to save file in
327 * @param false|string|array $lastmodified header values to check against for caching
328 * use false to return the header values from this download
329 * @param false|array $accept Accept headers to send
330 * @return string|array Returns the contents of the downloaded file or a PEAR
331 * error on failure. If the error is caused by
332 * socket-related errors, the error object will
333 * have the fsockopen error code available through
334 * getCode(). If caching is requested, then return the header
339 function downloadHttp($url, $lastmodified = null, $accept = false, $channel = false)
341 static $redirect = 0;
342 // always reset , so we are clean case of error
343 $wasredirect = $redirect;
346 $info = parse_url($url);
347 if (!isset($info['scheme']) || !in_array($info['scheme'], array('http', 'https'))) {
348 return PEAR::raiseError('Cannot download non-http URL "' . $url . '"');
351 if (!isset($info['host'])) {
352 return PEAR::raiseError('Cannot download from non-URL "' . $url . '"');
355 $host = isset($info['host']) ? $info['host'] : null;
356 $port = isset($info['port']) ? $info['port'] : null;
357 $path = isset($info['path']) ? $info['path'] : null;
358 $schema = (isset($info['scheme']) && $info['scheme'] == 'https') ? 'https' : 'http';
360 $proxy = new PEAR_Proxy($this->config);
363 $port = (isset($info['scheme']) && $info['scheme'] == 'https') ? 443 : 80;
366 if ($proxy->isProxyConfigured() && $schema === 'http') {
367 $request = "GET $url HTTP/1.1\r\n";
369 $request = "GET $path HTTP/1.1\r\n";
372 $request .= "Host: $host\r\n";
373 $ifmodifiedsince = '';
374 if (is_array($lastmodified)) {
375 if (isset($lastmodified['Last-Modified'])) {
376 $ifmodifiedsince = 'If-Modified-Since: ' . $lastmodified['Last-Modified'] . "\r\n";
379 if (isset($lastmodified['ETag'])) {
380 $ifmodifiedsince .= "If-None-Match: $lastmodified[ETag]\r\n";
383 $ifmodifiedsince = ($lastmodified ? "If-Modified-Since: $lastmodified\r\n" : '');
386 $request .= $ifmodifiedsince .
387 "User-Agent: PEAR/1.10.5/PHP/" . PHP_VERSION . "\r\n";
389 $username = $this->config->get('username', null, $channel);
390 $password = $this->config->get('password', null, $channel);
392 if ($username && $password) {
393 $tmp = base64_encode("$username:$password");
394 $request .= "Authorization: Basic $tmp\r\n";
397 $proxyAuth = $proxy->getProxyAuth();
399 $request .= 'Proxy-Authorization: Basic ' .
404 $request .= 'Accept: ' . implode(', ', $accept) . "\r\n";
407 $request .= "Accept-Encoding:\r\n";
408 $request .= "Connection: close\r\n";
411 $secure = ($schema == 'https');
412 $fp = $proxy->openSocket($host, $port, $secure);
413 if (PEAR::isError($fp)) {
417 fwrite($fp, $request);
421 while ($line = trim(fgets($fp, 1024))) {
422 if (preg_match('/^([^:]+):\s+(.*)\s*\\z/', $line, $matches)) {
423 $headers[strtolower($matches[1])] = trim($matches[2]);
424 } elseif (preg_match('|^HTTP/1.[01] ([0-9]{3}) |', $line, $matches)) {
425 $reply = (int)$matches[1];
426 if ($reply == 304 && ($lastmodified || ($lastmodified === false))) {
430 if (!in_array($reply, array(200, 301, 302, 303, 305, 307))) {
431 return PEAR::raiseError("File $schema://$host:$port$path not valid (received: $line)");
437 if (!isset($headers['location'])) {
438 return PEAR::raiseError("File $schema://$host:$port$path not valid (redirected but no location)");
441 if ($wasredirect > 4) {
442 return PEAR::raiseError("File $schema://$host:$port$path not valid (redirection looped more than 5 times)");
445 $redirect = $wasredirect + 1;
446 return $this->downloadHttp($headers['location'], $lastmodified, $accept, $channel);
449 $length = isset($headers['content-length']) ? $headers['content-length'] : -1;
452 while ($chunk = @fread($fp, 8192)) {
457 if ($lastmodified === false || $lastmodified) {
458 if (isset($headers['etag'])) {
459 $lastmodified = array('ETag' => $headers['etag']);
462 if (isset($headers['last-modified'])) {
463 if (is_array($lastmodified)) {
464 $lastmodified['Last-Modified'] = $headers['last-modified'];
466 $lastmodified = $headers['last-modified'];
470 return array($data, $lastmodified, $headers);