3 * StatusNet - the distributed open-source microblogging tool
4 * Copyright (C) 2009, StatusNet, Inc.
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU Affero General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Affero General Public License for more details.
16 * You should have received a copy of the GNU Affero General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
21 * @package FeedSubPlugin
22 * @maintainer Brion Vibber <brion@status.net>
25 if (!defined('STATUSNET') && !defined('LACONICA')) { exit(1); }
27 class FeedSubBadURLException extends FeedSubException
31 class FeedSubBadResponseException extends FeedSubException
35 class FeedSubEmptyException extends FeedSubException
39 class FeedSubBadHTMLException extends FeedSubException
43 class FeedSubUnrecognizedTypeException extends FeedSubException
47 class FeedSubNoFeedException extends FeedSubException
58 public function feedMunger()
60 require_once 'XML/Feed/Parser.php';
61 $feed = new XML_Feed_Parser($this->body, false, false, true); // @fixme
62 return new FeedMunger($feed, $this->uri);
68 * @return string with validated URL
69 * @throws FeedSubBadURLException
70 * @throws FeedSubBadHtmlException
71 * @throws FeedSubNoFeedException
72 * @throws FeedSubEmptyException
73 * @throws FeedSubUnrecognizedTypeException
75 function discoverFromURL($url, $htmlOk=true)
78 $client = new HTTPClient();
79 $response = $client->get($url);
80 } catch (HTTP_Request2_Exception $e) {
81 throw new FeedSubBadURLException($e);
85 $type = $response->getHeader('Content-Type');
86 $isHtml = preg_match('!^(text/html|application/xhtml\+xml)!i', $type);
88 $target = $this->discoverFromHTML($response->getUrl(), $response->getBody());
90 throw new FeedSubNoFeedException($url);
92 return $this->discoverFromURL($target, false);
96 return $this->initFromResponse($response);
99 function initFromResponse($response)
101 if (!$response->isOk()) {
102 throw new FeedSubBadResponseException($response->getCode());
105 $sourceurl = $response->getUrl();
106 $body = $response->getBody();
108 throw new FeedSubEmptyException($sourceurl);
111 $type = $response->getHeader('Content-Type');
112 if (preg_match('!^(text/xml|application/xml|application/(rss|atom)\+xml)!i', $type)) {
113 $this->uri = $sourceurl;
118 common_log(LOG_WARNING, "Unrecognized feed type $type for $sourceurl");
119 throw new FeedSubUnrecognizedTypeException($type);
124 * @param string $url source URL, used to resolve relative links
125 * @param string $body HTML body text
126 * @return mixed string with URL or false if no target found
128 function discoverFromHTML($url, $body)
130 // DOMDocument::loadHTML may throw warnings on unrecognized elements.
131 $old = error_reporting(error_reporting() & ~E_WARNING);
132 $dom = new DOMDocument();
133 $ok = $dom->loadHTML($body);
134 error_reporting($old);
137 throw new FeedSubBadHtmlException();
140 // Autodiscovery links may be relative to the page's URL or <base href>
142 $nodes = $dom->getElementsByTagName('base');
143 for ($i = 0; $i < $nodes->length; $i++) {
144 $node = $nodes->item($i);
145 if ($node->hasAttributes()) {
146 $href = $node->attributes->getNamedItem('href');
148 $base = trim($href->value);
153 $base = $this->resolveURI($base, $url);
158 // Ok... now on to the links!
159 // @fixme merge with the munger link checks
160 $nodes = $dom->getElementsByTagName('link');
161 for ($i = 0; $i < $nodes->length; $i++) {
162 $node = $nodes->item($i);
163 if ($node->hasAttributes()) {
164 $rel = $node->attributes->getNamedItem('rel');
165 $type = $node->attributes->getNamedItem('type');
166 $href = $node->attributes->getNamedItem('href');
167 if ($rel && $type && $href) {
168 $rel = trim($rel->value);
169 $type = trim($type->value);
170 $href = trim($href->value);
173 'application/rss+xml',
174 'application/atom+xml',
176 if (trim($rel) == 'alternate' && in_array($type, $feedTypes)) {
177 return $this->resolveURI($href, $base);
187 * Resolve a possibly relative URL against some absolute base URL
188 * @param string $rel relative or absolute URL
189 * @param string $base absolute URL
190 * @return string absolute URL, or original URL if could not be resolved.
192 function resolveURI($rel, $base)
194 require_once "Net/URL2.php";
196 $relUrl = new Net_URL2($rel);
197 if ($relUrl->isAbsolute()) {
200 $baseUrl = new Net_URL2($base);
201 $absUrl = $baseUrl->resolve($relUrl);
202 return $absUrl->getURL();
203 } catch (Exception $e) {
204 common_log(LOG_WARNING, 'Unable to resolve relative link "' .
205 $rel . '" against base "' . $base . '": ' . $e->getMessage());