]> git.mxchange.org Git - friendica.git/blobdiff - include/Scrape.php
Merge pull request #1139 from fabrixxm/notifix
[friendica.git] / include / Scrape.php
index 9c237916bc30bfa4ab7b23440fd23a9f89576c9b..99784af336120cf08e0a76ac306947712309de3e 100644 (file)
@@ -14,7 +14,7 @@ function scrape_dfrn($url) {
 
        $s = fetch_url($url);
 
-       if(! $s) 
+       if(! $s)
                return $ret;
 
        $headers = $a->get_curl_headers();
@@ -23,7 +23,7 @@ function scrape_dfrn($url) {
 
        $lines = explode("\n",$headers);
        if(count($lines)) {
-               foreach($lines as $line) {                              
+               foreach($lines as $line) {
                        // don't try and run feeds through the html5 parser
                        if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml'))))
                                return ret;
@@ -120,7 +120,7 @@ function scrape_meta($url) {
 
        $s = fetch_url($url);
 
-       if(! $s) 
+       if(! $s)
                return $ret;
 
        $headers = $a->get_curl_headers();
@@ -128,7 +128,7 @@ function scrape_meta($url) {
 
        $lines = explode("\n",$headers);
        if(count($lines)) {
-               foreach($lines as $line) {                              
+               foreach($lines as $line) {
                        // don't try and run feeds through the html5 parser
                        if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml'))))
                                return ret;
@@ -169,13 +169,13 @@ function scrape_vcard($url) {
 
        $s = fetch_url($url);
 
-       if(! $s) 
+       if(! $s)
                return $ret;
 
        $headers = $a->get_curl_headers();
        $lines = explode("\n",$headers);
        if(count($lines)) {
-               foreach($lines as $line) {                              
+               foreach($lines as $line) {
                        // don't try and run feeds through the html5 parser
                        if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml'))))
                                return ret;
@@ -236,14 +236,14 @@ function scrape_feed($url) {
        logger('scrape_feed: returns: ' . $code . ' headers=' . $headers, LOGGER_DEBUG);
 
        if(! $s) {
-               logger('scrape_feed: no data returned for ' . $url); 
+               logger('scrape_feed: no data returned for ' . $url);
                return $ret;
        }
 
 
        $lines = explode("\n",$headers);
        if(count($lines)) {
-               foreach($lines as $line) {                              
+               foreach($lines as $line) {
                        if(stristr($line,'content-type:')) {
                                if(stristr($line,'application/atom+xml') || stristr($s,'<feed')) {
                                        $ret['feed_atom'] = $url;
@@ -282,7 +282,7 @@ function scrape_feed($url) {
                }
        }
        if(! $basename)
-               $basename = substr($url,0,strrpos($url,'/')) . '/';
+               $basename = implode('/', array_slice(explode('/',$url),0,3)) . '/';
 
        $items = $dom->getElementsByTagName('link');
 
@@ -299,7 +299,7 @@ function scrape_feed($url) {
                                if(! x($ret,'feed_rss'))
                                        $ret['feed_rss'] = $item->getAttribute('href');
                        }
-               }       
+               }
        }
 
        // Drupal and perhaps others only provide relative URL's. Turn them into absolute.
@@ -346,16 +346,18 @@ function probe_url($url, $mode = PROBE_NORMAL) {
        $network = null;
        $diaspora = false;
        $diaspora_base = '';
-       $diaspora_guid = '';    
+       $diaspora_guid = '';
        $diaspora_key = '';
        $has_lrdd = false;
        $email_conversant = false;
 
-       $twitter = ((strpos($url,'twitter.com') !== false) ? true : false);
+       // Twitter is deactivated since twitter closed its old API
+       //$twitter = ((strpos($url,'twitter.com') !== false) ? true : false);
+       $lastfm  = ((strpos($url,'last.fm/user') !== false) ? true : false);
 
        $at_addr = ((strpos($url,'@') !== false) ? true : false);
 
-       if(! $twitter) {
+       if((! $twitter) && (! $lastfm)) {
 
                if(strpos($url,'mailto:') !== false && $at_addr) {
                        $url = str_replace('mailto:','',$url);
@@ -393,7 +395,10 @@ function probe_url($url, $mode = PROBE_NORMAL) {
                                }
                                if($link['@attributes']['rel'] === 'diaspora-public-key') {
                                        $diaspora_key = base64_decode(unamp($link['@attributes']['href']));
-                                       $pubkey = rsatopem($diaspora_key);
+                                       if(strstr($diaspora_key,'RSA '))
+                                               $pubkey = rsatopem($diaspora_key);
+                                       else
+                                               $pubkey = $diaspora_key;
                                        $diaspora = true;
                                }
                        }
@@ -431,14 +436,17 @@ function probe_url($url, $mode = PROBE_NORMAL) {
                                        intval(local_user())
                                );
                                if(count($x) && count($r)) {
-                                   $mailbox = construct_mailbox_name($r[0]);
+                                       $mailbox = construct_mailbox_name($r[0]);
                                        $password = '';
                                        openssl_private_decrypt(hex2bin($r[0]['pass']),$password,$x[0]['prvkey']);
                                        $mbox = email_connect($mailbox,$r[0]['user'],$password);
+                                       if(! $mbox)
+                                               logger('probe_url: email_connect failed.');
                                        unset($password);
                                }
                                if($mbox) {
                                        $msgs = email_poll($mbox,$orig_url);
+                                       logger('probe_url: searching ' . $orig_url . ', ' . count($msgs) . ' messages found.', LOGGER_DEBUG);
                                        if(count($msgs)) {
                                                $addr = $orig_url;
                                                $network = NETWORK_MAIL;
@@ -446,15 +454,15 @@ function probe_url($url, $mode = PROBE_NORMAL) {
                                                $phost = substr($url,strpos($url,'@')+1);
                                                $profile = 'http://' . $phost;
                                                // fix nick character range
-                                               $vcard = array('fn' => $name, 'nick' => $name, 'photo' => gravatar_img($url));
+                                               $vcard = array('fn' => $name, 'nick' => $name, 'photo' => avatar_img($url));
                                                $notify = 'smtp ' . random_string();
                                                $poll = 'email ' . random_string();
                                                $priority = 0;
                                                $x = email_msg_meta($mbox,$msgs[0]);
-                                               if(stristr($x->from,$orig_url))
-                                                       $adr = imap_rfc822_parse_adrlist($x->from,'');
-                                               elseif(stristr($x->to,$orig_url))
-                                                       $adr = imap_rfc822_parse_adrlist($x->to,'');
+                                               if(stristr($x[0]->from,$orig_url))
+                                                       $adr = imap_rfc822_parse_adrlist($x[0]->from,'');
+                                               elseif(stristr($x[0]->to,$orig_url))
+                                                       $adr = imap_rfc822_parse_adrlist($x[0]->to,'');
                                                if(isset($adr)) {
                                                        foreach($adr as $feadr) {
                                                                if((strcasecmp($feadr->mailbox,$name) == 0)
@@ -478,7 +486,7 @@ function probe_url($url, $mode = PROBE_NORMAL) {
                                }
                        }
                }
-       }       
+       }
 
        if($mode == PROBE_NORMAL) {
                if(strlen($zot)) {
@@ -524,7 +532,7 @@ function probe_url($url, $mode = PROBE_NORMAL) {
                }
                if(strpos($url,'@'))
                        $addr = str_replace('acct:', '', $url);
-       }                       
+       }
 
        if($network !== NETWORK_ZOT && $network !== NETWORK_DFRN && $network !== NETWORK_MAIL) {
                if($diaspora)
@@ -537,17 +545,24 @@ function probe_url($url, $mode = PROBE_NORMAL) {
                        $vcard = scrape_vcard($hcard);
 
                        // Google doesn't use absolute url in profile photos
-       
+
                        if((x($vcard,'photo')) && substr($vcard['photo'],0,1) == '/') {
                                $h = @parse_url($hcard);
                                if($h)
                                        $vcard['photo'] = $h['scheme'] . '://' . $h['host'] . $vcard['photo'];
                        }
-               
+
                        logger('probe_url: scrape_vcard: ' . print_r($vcard,true), LOGGER_DATA);
                }
 
-               if($twitter) {          
+               if($diaspora && $addr) {
+                       // Diaspora returns the name as the nick. As the nick will never be updated,
+                       // let's use the Diaspora nickname (the first part of the handle) as the nick instead
+                       $addr_parts = explode('@', $addr);
+                       $vcard['nick'] = $addr_parts[0];
+               }
+
+               /* if($twitter) {
                        logger('twitter: setup');
                        $tid = basename($url);
                        $tapi = 'https://api.twitter.com/1/statuses/user_timeline.rss';
@@ -556,9 +571,18 @@ function probe_url($url, $mode = PROBE_NORMAL) {
                        else
                                $poll = $tapi . '?screen_name=' . $tid;
                        $profile = 'http://twitter.com/#!/' . $tid;
-                       $vcard['photo'] = 'https://api.twitter.com/1/users/profile_image/' . $tid;
+                       //$vcard['photo'] = 'https://api.twitter.com/1/users/profile_image/' . $tid;
+                       $vcard['photo'] = 'https://api.twitter.com/1/users/profile_image?screen_name=' . $tid . '&size=bigger';
                        $vcard['nick'] = $tid;
-                       $vcard['fn'] = $tid . '@twitter';
+                       $vcard['fn'] = $tid;
+               } */
+
+               if($lastfm) {
+                       $profile = $url;
+                       $poll = str_replace(array('www.','last.fm/'),array('','ws.audioscrobbler.com/1.0/'),$url) . '/recenttracks.rss';
+                       $vcard['nick'] = basename($url);
+                       $vcard['fn'] = $vcard['nick'] . t(' on Last.fm');
+                       $network = NETWORK_FEED;
                }
 
                if(! x($vcard,'fn'))
@@ -567,6 +591,12 @@ function probe_url($url, $mode = PROBE_NORMAL) {
 
                $check_feed = false;
 
+               if(stristr($url,'tumblr.com') && (! stristr($url,'/rss'))) {
+                       $poll = $url . '/rss';
+                       $check_feed = true;
+                       // Will leave it to others to figure out how to grab the avatar, which is on the $url page in the open graph meta links
+               }
+
                if($twitter || ! $poll)
                        $check_feed = true;
                if((! isset($vcard)) || (! x($vcard,'fn')) || (! $profile))
@@ -580,24 +610,25 @@ function probe_url($url, $mode = PROBE_NORMAL) {
                        logger('probe_url: scrape_feed ' . (($poll)? $poll : $url) . ' returns: ' . print_r($feedret,true), LOGGER_DATA);
                        if(count($feedret) && ($feedret['feed_atom'] || $feedret['feed_rss'])) {
                                $poll = ((x($feedret,'feed_atom')) ? unamp($feedret['feed_atom']) : unamp($feedret['feed_rss']));
-                               if(! x($vcard)) 
+                               if(! x($vcard))
                                        $vcard = array();
                        }
 
                        if(x($feedret,'photo') && (! x($vcard,'photo')))
                                $vcard['photo'] = $feedret['photo'];
                        require_once('library/simplepie/simplepie.inc');
-                   $feed = new SimplePie();
+                       $feed = new SimplePie();
                        $xml = fetch_url($poll);
 
                        logger('probe_url: fetch feed: ' . $poll . ' returns: ' . $xml, LOGGER_DATA);
                        $a = get_app();
 
-                       logger('probe_url: scrape_feed: headers: ' . $a->get_curl_headers(), $LOGGER_DATA);
+                       logger('probe_url: scrape_feed: headers: ' . $a->get_curl_headers(), LOGGER_DATA);
 
-                       $feed->set_raw_data($xml);
+                       // Don't try and parse an empty string
+                       $feed->set_raw_data(($xml) ? $xml : '<?xml version="1.0" encoding="utf-8" ?><xml></xml>');
 
-                   $feed->init();
+                       $feed->init();
                        if($feed->error())
                                logger('probe_url: scrape_feed: Error parsing XML: ' . $feed->error());
 
@@ -606,7 +637,7 @@ function probe_url($url, $mode = PROBE_NORMAL) {
                                $vcard['photo'] = $feed->get_image_url();
                        $author = $feed->get_author();
 
-                       if($author) {                   
+                       if($author) {
                                $vcard['fn'] = unxmlify(trim($author->get_name()));
                                if(! $vcard['fn'])
                                        $vcard['fn'] = trim(unxmlify($author->get_email()));
@@ -617,18 +648,18 @@ function probe_url($url, $mode = PROBE_NORMAL) {
                                        $profile = trim(unxmlify($author->get_link()));
                                if(! $vcard['photo']) {
                                        $rawtags = $feed->get_feed_tags( SIMPLEPIE_NAMESPACE_ATOM_10, 'author');
-                               if($rawtags) {
+                                       if($rawtags) {
                                                $elems = $rawtags[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_10];
                                                if((x($elems,'link')) && ($elems['link'][0]['attribs']['']['rel'] === 'photo'))
                                                        $vcard['photo'] = $elems['link'][0]['attribs']['']['href'];
-                               }
+                                       }
                                }
                        }
                        else {
                                $item = $feed->get_item(0);
                                if($item) {
                                        $author = $item->get_author();
-                                       if($author) {                   
+                                       if($author) {
                                                $vcard['fn'] = trim(unxmlify($author->get_name()));
                                                if(! $vcard['fn'])
                                                        $vcard['fn'] = trim(unxmlify($author->get_email()));
@@ -645,22 +676,27 @@ function probe_url($url, $mode = PROBE_NORMAL) {
                                        }
                                        if(! $vcard['photo']) {
                                                $rawtags = $item->get_item_tags( SIMPLEPIE_NAMESPACE_ATOM_10, 'author');
-                                       if($rawtags) {
+                                               if($rawtags) {
                                                        $elems = $rawtags[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_10];
                                                        if((x($elems,'link')) && ($elems['link'][0]['attribs']['']['rel'] === 'photo'))
                                                                $vcard['photo'] = $elems['link'][0]['attribs']['']['href'];
-                                       }
+                                               }
                                        }
                                }
                        }
 
                        if((! $vcard['photo']) && strlen($email))
-                               $vcard['photo'] = gravatar_img($email);
+                               $vcard['photo'] = avatar_img($email);
                        if($poll === $profile)
                                $lnk = $feed->get_permalink();
                        if(isset($lnk) && strlen($lnk))
-                               $profile = $lnk;        
+                               $profile = $lnk;
 
+                       if(! $network) {
+                               $network = NETWORK_FEED;
+                               // If it is a feed, don't take the author name as feed name
+                               unset($vcard['fn']);
+                       }
                        if(! (x($vcard,'fn')))
                                $vcard['fn'] = notags($feed->get_title());
                        if(! (x($vcard,'fn')))
@@ -675,8 +711,6 @@ function probe_url($url, $mode = PROBE_NORMAL) {
                                if(strpos($vcard['nick'],' '))
                                        $vcard['nick'] = trim(substr($vcard['nick'],0,strpos($vcard['nick'],' ')));
                        }
-                       if(! $network)
-                               $network = NETWORK_FEED;
                        if(! $priority)
                                $priority = 2;
                }
@@ -684,7 +718,7 @@ function probe_url($url, $mode = PROBE_NORMAL) {
 
        if(! x($vcard,'photo')) {
                $a = get_app();
-               $vcard['photo'] = $a->get_baseurl() . '/images/person-175.jpg' ; 
+               $vcard['photo'] = $a->get_baseurl() . '/images/person-175.jpg' ;
        }
 
        if(! $profile)
@@ -697,7 +731,7 @@ function probe_url($url, $mode = PROBE_NORMAL) {
 
        $vcard['fn'] = notags($vcard['fn']);
        $vcard['nick'] = str_replace(' ','',notags($vcard['nick']));
-               
+
        $result['name'] = $vcard['fn'];
        $result['nick'] = $vcard['nick'];
        $result['url'] = $profile;