]> git.mxchange.org Git - quix0rs-gnu-social.git/blobdiff - plugins/OStatus/lib/feeddiscovery.php
Merge branch 'master' into testing
[quix0rs-gnu-social.git] / plugins / OStatus / lib / feeddiscovery.php
index 9bc7892fb2db0e641e7f75b4af45fb8e9424bcce..4ac243832617b89b00737b3e497f9ed27dc662a4 100644 (file)
@@ -48,6 +48,14 @@ class FeedSubNoFeedException extends FeedSubException
 {
 }
 
+class FeedSubBadXmlException extends FeedSubException
+{
+}
+
+class FeedSubNoHubException extends FeedSubException
+{
+}
+
 /**
  * Given a web page or feed URL, discover the final location of the feed
  * and return its current contents.
@@ -57,21 +65,26 @@ class FeedSubNoFeedException extends FeedSubException
  *   if ($feed->discoverFromURL($url)) {
  *     print $feed->uri;
  *     print $feed->type;
- *     processFeed($feed->body);
+ *     processFeed($feed->feed); // DOMDocument
  *   }
  */
 class FeedDiscovery
 {
     public $uri;
     public $type;
-    public $body;
+    public $feed;
+    public $root;
 
+    /** Post-initialize query helper... */
+    public function getLink($rel, $type=null)
+    {
+        // @fixme check for non-Atom links in RSS2 feeds as well
+        return self::getAtomLink($rel, $type);
+    }
 
-    public function feedMunger()
+    public function getAtomLink($rel, $type=null)
     {
-        require_once 'XML/Feed/Parser.php';
-        $feed = new XML_Feed_Parser($this->body, false, false, true); // @fixme
-        return new FeedMunger($feed, $this->uri);
+        return ActivityUtils::getLink($this->root, $rel, $type);
     }
 
     /**
@@ -90,7 +103,8 @@ class FeedDiscovery
             $client = new HTTPClient();
             $response = $client->get($url);
         } catch (HTTP_Request2_Exception $e) {
-            throw new FeedSubBadURLException($e);
+            common_log(LOG_ERR, __METHOD__ . " Failure for $url - " . $e->getMessage());
+            throw new FeedSubBadURLException($e->getMessage());
         }
 
         if ($htmlOk) {
@@ -104,14 +118,19 @@ class FeedDiscovery
                 return $this->discoverFromURL($target, false);
             }
         }
-        
+
         return $this->initFromResponse($response);
     }
-    
+
+    function discoverFromFeedURL($url)
+    {
+        return $this->discoverFromURL($url, false);
+    }
+
     function initFromResponse($response)
     {
         if (!$response->isOk()) {
-            throw new FeedSubBadResponseException($response->getCode());
+            throw new FeedSubBadResponseException($response->getStatus());
         }
 
         $sourceurl = $response->getUrl();
@@ -122,16 +141,44 @@ class FeedDiscovery
 
         $type = $response->getHeader('Content-Type');
         if (preg_match('!^(text/xml|application/xml|application/(rss|atom)\+xml)!i', $type)) {
-            $this->uri = $sourceurl;
-            $this->type = $type;
-            $this->body = $body;
-            return true;
+            return $this->init($sourceurl, $type, $body);
         } else {
             common_log(LOG_WARNING, "Unrecognized feed type $type for $sourceurl");
             throw new FeedSubUnrecognizedTypeException($type);
         }
     }
 
+    function init($sourceurl, $type, $body)
+    {
+        $feed = new DOMDocument();
+        if ($feed->loadXML($body)) {
+            $this->uri = $sourceurl;
+            $this->type = $type;
+            $this->feed = $feed;
+
+            $el = $this->feed->documentElement;
+
+            // Looking for the "root" element: RSS channel or Atom feed
+
+            if ($el->tagName == 'rss') {
+                $channels = $el->getElementsByTagName('channel');
+                if ($channels->length > 0) {
+                    $this->root = $channels->item(0);
+                } else {
+                    throw new FeedSubBadXmlException($sourceurl);
+                }
+            } else if ($el->tagName == 'feed') {
+                $this->root = $el;
+            } else {
+                throw new FeedSubBadXmlException($sourceurl);
+            }
+
+            return $this->uri;
+        } else {
+            throw new FeedSubBadXmlException($sourceurl);
+        }
+    }
+
     /**
      * @param string $url source URL, used to resolve relative links
      * @param string $body HTML body text
@@ -168,7 +215,13 @@ class FeedDiscovery
         }
 
         // Ok... now on to the links!
+        // Types listed in order of priority -- we'll prefer Atom if available.
         // @fixme merge with the munger link checks
+        $feeds = array(
+            'application/atom+xml' => false,
+            'application/rss+xml' => false,
+        );
+
         $nodes = $dom->getElementsByTagName('link');
         for ($i = 0; $i < $nodes->length; $i++) {
             $node = $nodes->item($i);
@@ -177,21 +230,25 @@ class FeedDiscovery
                 $type = $node->attributes->getNamedItem('type');
                 $href = $node->attributes->getNamedItem('href');
                 if ($rel && $type && $href) {
-                    $rel = trim($rel->value);
+                    $rel = array_filter(explode(" ", $rel->value));
                     $type = trim($type->value);
                     $href = trim($href->value);
 
-                    $feedTypes = array(
-                        'application/rss+xml',
-                        'application/atom+xml',
-                    );
-                    if (trim($rel) == 'alternate' && in_array($type, $feedTypes)) {
-                        return $this->resolveURI($href, $base);
+                    if (in_array('alternate', $rel) && array_key_exists($type, $feeds) && empty($feeds[$type])) {
+                        // Save the first feed found of each type...
+                        $feeds[$type] = $this->resolveURI($href, $base);
                     }
                 }
             }
         }
 
+        // Return the highest-priority feed found
+        foreach ($feeds as $type => $url) {
+            if ($url) {
+                return $url;
+            }
+        }
+
         return false;
     }