]> git.mxchange.org Git - friendica.git/blobdiff - include/Scrape.php
fix setting array values to config items
[friendica.git] / include / Scrape.php
index 2af02fff5e06b3040d6eba0d3ebc5b646257b38f..22725260091cac26f848183c4ac5f5d12e46e938 100644 (file)
@@ -30,8 +30,11 @@ function scrape_dfrn($url) {
                }
        }
 
-
-       $dom = HTML5_Parser::parse($s);
+       try {
+               $dom = HTML5_Parser::parse($s);
+       } catch (DOMException $e) {
+               logger('scrape_dfrn: parse error: ' . $e);
+       }
 
        if(! $dom)
                return $ret;
@@ -132,9 +135,11 @@ function scrape_meta($url) {
                }
        }
 
-
-
-       $dom = HTML5_Parser::parse($s);
+       try {
+               $dom = HTML5_Parser::parse($s);
+       } catch (DOMException $e) {
+               logger('scrape_meta: parse error: ' . $e);
+       }
 
        if(! $dom)
                return $ret;
@@ -177,7 +182,11 @@ function scrape_vcard($url) {
                }
        }
 
-       $dom = HTML5_Parser::parse($s);
+       try {
+               $dom = HTML5_Parser::parse($s);
+       } catch (DOMException $e) {
+               logger('scrape_vcard: parse error: ' . $e);
+       }
 
        if(! $dom)
                return $ret;
@@ -221,11 +230,16 @@ function scrape_feed($url) {
        $ret = array();
        $s = fetch_url($url);
 
-       if(! $s) 
+       $headers = $a->get_curl_headers();
+       $code = $a->get_curl_code();
+
+       logger('scrape_feed: returns: ' . $code . ' headers=' . $headers, LOGGER_DEBUG);
+
+       if(! $s) {
+               logger('scrape_feed: no data returned for ' . $url); 
                return $ret;
+       }
 
-       $headers = $a->get_curl_headers();
-       logger('scrape_feed: headers=' . $headers, LOGGER_DEBUG);
 
        $lines = explode("\n",$headers);
        if(count($lines)) {
@@ -241,12 +255,23 @@ function scrape_feed($url) {
                                }
                        }
                }
+               // perhaps an RSS version 1 feed with a generic or incorrect content-type?
+               if(stristr($s,'</item>')) {
+                       $ret['feed_rss'] = $url;
+                       return $ret;
+               }
        }
 
-       $dom = HTML5_Parser::parse($s);
+       try {
+               $dom = HTML5_Parser::parse($s);
+       } catch (DOMException $e) {
+               logger('scrape_feed: parse error: ' . $e);
+       }
 
-       if(! $dom)
+       if(! $dom) {
+               logger('scrape_feed: failed to parse.');
                return $ret;
+       }
 
 
        $head = $dom->getElementsByTagName('base');
@@ -257,7 +282,7 @@ function scrape_feed($url) {
                }
        }
        if(! $basename)
-               $basename = substr($url,0,strrpos($url,'/')) . '/';
+               $basename = implode('/', array_slice(explode('/',$url),0,3)) . '/';
 
        $items = $dom->getElementsByTagName('link');
 
@@ -301,7 +326,7 @@ function scrape_feed($url) {
  *
  * PROBE_DIASPORA has a bias towards returning Diaspora information
  * while PROBE_NORMAL has a bias towards dfrn/zot - in the case where
- * an address (such as a Friendika address) supports more than one type
+ * an address (such as a Friendica address) supports more than one type
  * of network. 
  *
  */
@@ -356,6 +381,8 @@ function probe_url($url, $mode = PROBE_NORMAL) {
                                        $hcard = unamp($link['@attributes']['href']);
                                if($link['@attributes']['rel'] === 'http://webfinger.net/rel/profile-page')
                                        $profile = unamp($link['@attributes']['href']);
+                               if($link['@attributes']['rel'] === 'http://portablecontacts.net/spec/1.0')
+                                       $poco = unamp($link['@attributes']['href']);
                                if($link['@attributes']['rel'] === 'http://joindiaspora.com/seed_location') {
                                        $diaspora_base = unamp($link['@attributes']['href']);
                                        $diaspora = true;
@@ -419,7 +446,7 @@ function probe_url($url, $mode = PROBE_NORMAL) {
                                                $phost = substr($url,strpos($url,'@')+1);
                                                $profile = 'http://' . $phost;
                                                // fix nick character range
-                                               $vcard = array('fn' => $name, 'nick' => $name, 'photo' => gravatar_img($url));
+                                               $vcard = array('fn' => $name, 'nick' => $name, 'photo' => avatar_img($url));
                                                $notify = 'smtp ' . random_string();
                                                $poll = 'email ' . random_string();
                                                $priority = 0;
@@ -430,10 +457,19 @@ function probe_url($url, $mode = PROBE_NORMAL) {
                                                        $adr = imap_rfc822_parse_adrlist($x->to,'');
                                                if(isset($adr)) {
                                                        foreach($adr as $feadr) {
-                                                               if((strcasecmp($feadr->mailbox,$name) == 0) 
-                                                                       &&(strcasecmp($feadr->host,$phost) == 0) 
+                                                               if((strcasecmp($feadr->mailbox,$name) == 0)
+                                                                       &&(strcasecmp($feadr->host,$phost) == 0)
                                                                        && (strlen($feadr->personal))) {
-                                                                       $vcard['fn'] = notags($feadr->personal);
+
+                                                                       $personal = imap_mime_header_decode($feadr->personal);
+                                                                       $vcard['fn'] = "";
+                                                                       foreach($personal as $perspart)
+                                                                               if ($perspart->charset != "default")
+                                                                                       $vcard['fn'] .= iconv($perspart->charset, 'UTF-8//IGNORE', $perspart->text);
+                                                                               else
+                                                                                       $vcard['fn'] .= $perspart->text;
+
+                                                                       $vcard['fn'] = notags($vcard['fn']);
                                                                }
                                                        }
                                                }
@@ -533,7 +569,7 @@ function probe_url($url, $mode = PROBE_NORMAL) {
 
                if($twitter || ! $poll)
                        $check_feed = true;
-               if((! isset($vcard)) || (! $profile))
+               if((! isset($vcard)) || (! x($vcard,'fn')) || (! $profile))
                        $check_feed = true;
                if(($at_addr) && (! count($links)))
                        $check_feed = false;
@@ -541,7 +577,7 @@ function probe_url($url, $mode = PROBE_NORMAL) {
                if($check_feed) {
 
                        $feedret = scrape_feed(($poll) ? $poll : $url);
-                       logger('probe_url: scrape_feed returns: ' . print_r($feedret,true), LOGGER_DATA);
+                       logger('probe_url: scrape_feed ' . (($poll)? $poll : $url) . ' returns: ' . print_r($feedret,true), LOGGER_DATA);
                        if(count($feedret) && ($feedret['feed_atom'] || $feedret['feed_rss'])) {
                                $poll = ((x($feedret,'feed_atom')) ? unamp($feedret['feed_atom']) : unamp($feedret['feed_rss']));
                                if(! x($vcard)) 
@@ -557,7 +593,7 @@ function probe_url($url, $mode = PROBE_NORMAL) {
                        logger('probe_url: fetch feed: ' . $poll . ' returns: ' . $xml, LOGGER_DATA);
                        $a = get_app();
 
-                       logger('probe_url: scrape_feed: headers: ' . $a->get_curl_headers(), $LOGGER_DATA);
+                       logger('probe_url: scrape_feed: headers: ' . $a->get_curl_headers(), LOGGER_DATA);
 
                        $feed->set_raw_data($xml);
 
@@ -619,7 +655,7 @@ function probe_url($url, $mode = PROBE_NORMAL) {
                        }
 
                        if((! $vcard['photo']) && strlen($email))
-                               $vcard['photo'] = gravatar_img($email);
+                               $vcard['photo'] = avatar_img($email);
                        if($poll === $profile)
                                $lnk = $feed->get_permalink();
                        if(isset($lnk) && strlen($lnk))
@@ -648,7 +684,7 @@ function probe_url($url, $mode = PROBE_NORMAL) {
 
        if(! x($vcard,'photo')) {
                $a = get_app();
-               $vcard['photo'] = $a->get_baseurl() . '/images/default-profile.jpg' ; 
+               $vcard['photo'] = $a->get_baseurl() . '/images/person-175.jpg' ; 
        }
 
        if(! $profile)
@@ -671,6 +707,7 @@ function probe_url($url, $mode = PROBE_NORMAL) {
        $result['poll'] = $poll;
        $result['request'] = $request;
        $result['confirm'] = $confirm;
+       $result['poco'] = $poco;
        $result['photo'] = $vcard['photo'];
        $result['priority'] = $priority;
        $result['network'] = $network;