]> git.mxchange.org Git - friendica.git/blobdiff - include/Scrape.php
Merge remote-tracking branch 'upstream/develop' into 1504-unified-follow
[friendica.git] / include / Scrape.php
index ad9e030a39b83872500b7dc0c029ed65b1051299..ce18bb10333f5c5dddf573c609263512716a28e3 100644 (file)
@@ -263,11 +263,6 @@ function scrape_feed($url) {
        }
 
        try {
-               // Cleanup invalid HTML
-               $doc = new DOMDocument();
-               @$doc->loadHTML($s);
-                $s = $doc->saveHTML();
-
                $dom = HTML5_Parser::parse($s);
        } catch (DOMException $e) {
                logger('scrape_feed: parse error: ' . $e);
@@ -348,6 +343,12 @@ function probe_url($url, $mode = PROBE_NORMAL) {
        if(! $url)
                return $result;
 
+       $result = Cache::get("probe_url:".$mode.":".$url);
+       if (!is_null($result)) {
+               $result = unserialize($result);
+               return $result;
+       }
+
        $network = null;
        $diaspora = false;
        $diaspora_base = '';
@@ -355,6 +356,13 @@ function probe_url($url, $mode = PROBE_NORMAL) {
        $diaspora_key = '';
        $has_lrdd = false;
        $email_conversant = false;
+       $connectornetworks = false;
+       $appnet = false;
+
+       if (strpos($url,'twitter.com')) {
+               $connectornetworks = true;
+               $network = NETWORK_TWITTER;
+       }
 
        // Twitter is deactivated since twitter closed its old API
        //$twitter = ((strpos($url,'twitter.com') !== false) ? true : false);
@@ -362,7 +370,7 @@ function probe_url($url, $mode = PROBE_NORMAL) {
 
        $at_addr = ((strpos($url,'@') !== false) ? true : false);
 
-       if((! $twitter) && (! $lastfm)) {
+       if((!$appnet) && (!$lastfm) && !$connectornetworks) {
 
                if(strpos($url,'mailto:') !== false && $at_addr) {
                        $url = str_replace('mailto:','',$url);
@@ -406,13 +414,16 @@ function probe_url($url, $mode = PROBE_NORMAL) {
                                                $pubkey = $diaspora_key;
                                        $diaspora = true;
                                }
+                               if($link['@attributes']['rel'] === 'http://ostatus.org/schema/1.0/subscribe') {
+                                       $diaspora = false;
+                               }
                        }
 
                        // Status.Net can have more than one profile URL. We need to match the profile URL
                        // to a contact on incoming messages to prevent spam, and we won't know which one
                        // to match. So in case of two, one of them is stored as an alias. Only store URL's
                        // and not webfinger user@host aliases. If they've got more than two non-email style
-                       // aliases, let's hope we're lucky and get one that matches the feed author-uri because 
+                       // aliases, let's hope we're lucky and get one that matches the feed author-uri because
                        // otherwise we're screwed.
 
                        foreach($links as $link) {
@@ -427,6 +438,10 @@ function probe_url($url, $mode = PROBE_NORMAL) {
                                        }
                                }
                        }
+
+                       // If the profile is different from the url then the url is abviously an alias
+                       if (($alias == "") AND ($profile != "") AND !$at_addr AND (normalise_link($profile) != normalise_link($url)))
+                               $alias = $url;
                }
                elseif($mode == PROBE_NORMAL) {
 
@@ -501,8 +516,8 @@ function probe_url($url, $mode = PROBE_NORMAL) {
                                if($j) {
                                        $network = NETWORK_ZOT;
                                        $vcard   = array(
-                                               'fn'    => $j->fullname, 
-                                               'nick'  => $j->nickname, 
+                                               'fn'    => $j->fullname,
+                                               'nick'  => $j->nickname,
                                                'photo' => $j->photo
                                        );
                                        $profile  = $j->url;
@@ -544,6 +559,10 @@ function probe_url($url, $mode = PROBE_NORMAL) {
                        $network = NETWORK_DIASPORA;
                elseif($has_lrdd)
                        $network  = NETWORK_OSTATUS;
+
+               if(strpos($url,'@'))
+                       $addr = str_replace('acct:', '', $url);
+
                $priority = 0;
 
                if($hcard && ! $vcard) {
@@ -602,13 +621,16 @@ function probe_url($url, $mode = PROBE_NORMAL) {
                        // Will leave it to others to figure out how to grab the avatar, which is on the $url page in the open graph meta links
                }
 
-               if($twitter || ! $poll)
+               if($appnet || ! $poll)
                        $check_feed = true;
                if((! isset($vcard)) || (! x($vcard,'fn')) || (! $profile))
                        $check_feed = true;
                if(($at_addr) && (! count($links)))
                        $check_feed = false;
 
+               if ($connectornetworks)
+                       $check_feed = false;
+
                if($check_feed) {
 
                        $feedret = scrape_feed(($poll) ? $poll : $url);
@@ -734,6 +756,22 @@ function probe_url($url, $mode = PROBE_NORMAL) {
        if(($network === NETWORK_FEED) && ($poll) && (! x($vcard,'fn')))
                $vcard['fn'] = $url;
 
+       if (($notify != "") AND ($poll != "")) {
+               $baseurl = matching($notify, $poll);
+
+               $baseurl2 = matching($baseurl, $profile);
+               if ($baseurl2 != "")
+                       $baseurl = $baseurl2;
+       }
+
+       if (($baseurl == "") AND ($notify != ""))
+               $baseurl = matching($profile, $notify);
+
+       if (($baseurl == "") AND ($poll != ""))
+               $baseurl = matching($profile, $poll);
+
+       $baseurl = rtrim($baseurl, "/");
+
        $vcard['fn'] = notags($vcard['fn']);
        $vcard['nick'] = str_replace(' ','',notags($vcard['nick']));
 
@@ -752,8 +790,37 @@ function probe_url($url, $mode = PROBE_NORMAL) {
        $result['network'] = $network;
        $result['alias'] = $alias;
        $result['pubkey'] = $pubkey;
+       $result['baseurl'] = $baseurl;
 
        logger('probe_url: ' . print_r($result,true), LOGGER_DEBUG);
 
+       // Trying if it maybe a diaspora account
+       if (($result['network'] == NETWORK_FEED) OR ($result['addr'] == "")) {
+               require_once('include/bbcode.php');
+               $address = GetProfileUsername($url, "", true);
+               $result2 = probe_url($address, $mode);
+               if ($result2['network'] != "")
+                       $result = $result2;
+       }
+
+       Cache::set("probe_url:".$mode.":".$url,serialize($result));
+
        return $result;
 }
+
+function matching($part1, $part2) {
+       $len = min(strlen($part1), strlen($part2));
+
+       $match = "";
+       $matching = true;
+       $i = 0;
+       while (($i <= $len) AND $matching) {
+               if (substr($part1, $i, 1) == substr($part2, $i, 1))
+                       $match .= substr($part1, $i, 1);
+               else
+                       $matching = false;
+
+               $i++;
+       }
+       return($match);
+}