* @maintainer Brion Vibber <brion@status.net>
*/
-if (!defined('STATUSNET') && !defined('LACONICA')) { exit(1); }
+if (!defined('STATUSNET')) {
+ exit(1);
+}
class FeedSubBadURLException extends FeedSubException
{
{
}
+class FeedSubBadXmlException extends FeedSubException
+{
+}
+
+class FeedSubNoHubException extends FeedSubException
+{
+}
+
/**
* Given a web page or feed URL, discover the final location of the feed
* and return its current contents.
* if ($feed->discoverFromURL($url)) {
* print $feed->uri;
* print $feed->type;
- * processFeed($feed->body);
+ * processFeed($feed->feed); // DOMDocument
* }
*/
class FeedDiscovery
{
public $uri;
public $type;
- public $body;
+ public $feed;
+ public $root;
+
+ /** Post-initialize query helper... */
+ public function getLink($rel, $type=null)
+ {
+ // @fixme check for non-Atom links in RSS2 feeds as well
+ return self::getAtomLink($rel, $type);
+ }
+ public function getAtomLink($rel, $type=null)
+ {
+ return ActivityUtils::getLink($this->root, $rel, $type);
+ }
- public function feedMunger()
+ /**
+ * Get the referenced PuSH hub link from an Atom feed.
+ *
+ * @return mixed string or false
+ */
+ public function getHubLink()
{
- require_once 'XML/Feed/Parser.php';
- $feed = new XML_Feed_Parser($this->body, false, false, true); // @fixme
- return new FeedMunger($feed, $this->uri);
+ return $this->getAtomLink('hub');
}
/**
$client = new HTTPClient();
$response = $client->get($url);
} catch (HTTP_Request2_Exception $e) {
- throw new FeedSubBadURLException($e);
+ common_log(LOG_ERR, __METHOD__ . " Failure for $url - " . $e->getMessage());
+ throw new FeedSubBadURLException($e->getMessage());
}
if ($htmlOk) {
return $this->discoverFromURL($target, false);
}
}
-
+
return $this->initFromResponse($response);
}
-
+
+ function discoverFromFeedURL($url)
+ {
+ return $this->discoverFromURL($url, false);
+ }
+
function initFromResponse($response)
{
if (!$response->isOk()) {
- throw new FeedSubBadResponseException($response->getCode());
+ throw new FeedSubBadResponseException($response->getStatus());
}
$sourceurl = $response->getUrl();
$type = $response->getHeader('Content-Type');
if (preg_match('!^(text/xml|application/xml|application/(rss|atom)\+xml)!i', $type)) {
- $this->uri = $sourceurl;
- $this->type = $type;
- $this->body = $body;
- return true;
+ return $this->init($sourceurl, $type, $body);
} else {
common_log(LOG_WARNING, "Unrecognized feed type $type for $sourceurl");
throw new FeedSubUnrecognizedTypeException($type);
}
}
+ function init($sourceurl, $type, $body)
+ {
+ $feed = new DOMDocument();
+ if ($feed->loadXML($body)) {
+ $this->uri = $sourceurl;
+ $this->type = $type;
+ $this->feed = $feed;
+
+ $el = $this->feed->documentElement;
+
+ // Looking for the "root" element: RSS channel or Atom feed
+
+ if ($el->tagName == 'rss') {
+ $channels = $el->getElementsByTagName('channel');
+ if ($channels->length > 0) {
+ $this->root = $channels->item(0);
+ } else {
+ throw new FeedSubBadXmlException($sourceurl);
+ }
+ } else if ($el->tagName == 'feed') {
+ $this->root = $el;
+ } else {
+ throw new FeedSubBadXmlException($sourceurl);
+ }
+
+ return $this->uri;
+ } else {
+ throw new FeedSubBadXmlException($sourceurl);
+ }
+ }
+
/**
* @param string $url source URL, used to resolve relative links
* @param string $body HTML body text
*/
function discoverFromHTML($url, $body)
{
- // DOMDocument::loadHTML may throw warnings on unrecognized elements.
- $old = error_reporting(error_reporting() & ~E_WARNING);
+ // DOMDocument::loadHTML may throw warnings on unrecognized elements,
+ // and notices on unrecognized namespaces.
+ $old = error_reporting(error_reporting() & ~(E_WARNING | E_NOTICE));
$dom = new DOMDocument();
$ok = $dom->loadHTML($body);
error_reporting($old);
'application/atom+xml' => false,
'application/rss+xml' => false,
);
-
+
$nodes = $dom->getElementsByTagName('link');
for ($i = 0; $i < $nodes->length; $i++) {
$node = $nodes->item($i);
$type = $node->attributes->getNamedItem('type');
$href = $node->attributes->getNamedItem('href');
if ($rel && $type && $href) {
- $rel = trim($rel->value);
+ $rel = array_filter(explode(" ", $rel->value));
$type = trim($type->value);
$href = trim($href->value);
- if (trim($rel) == 'alternate' && array_key_exists($type, $feeds) && empty($feeds[$type])) {
+ if (in_array('alternate', $rel) && array_key_exists($type, $feeds) && empty($feeds[$type])) {
// Save the first feed found of each type...
$feeds[$type] = $this->resolveURI($href, $base);
}