]> git.mxchange.org Git - friendica.git/blobdiff - include/Scrape.php
fixed missing close parenthesis
[friendica.git] / include / Scrape.php
index e4f7a0878c71016332f7f28bee75b1f97bef8bf1..ff98992526314474b6dcc7aa20386a92f114c332 100644 (file)
@@ -5,12 +5,31 @@ require_once('library/HTML5/Parser.php');
 if(! function_exists('scrape_dfrn')) {
 function scrape_dfrn($url) {
 
+       $a = get_app();
+
        $ret = array();
+
+       logger('scrape_dfrn: url=' . $url);
+
        $s = fetch_url($url);
 
        if(! $s) 
                return $ret;
 
+       $headers = $a->get_curl_headers();
+       logger('scrape_dfrn: headers=' . $headers, LOGGER_DEBUG);
+
+
+       $lines = explode("\n",$headers);
+       if(count($lines)) {
+               foreach($lines as $line) {                              
+                       // don't try and run feeds through the html5 parser
+                       if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml'))))
+                               return ret;
+               }
+       }
+
+
        $dom = HTML5_Parser::parse($s);
 
        if(! $dom)
@@ -77,12 +96,31 @@ function validate_dfrn($a) {
 if(! function_exists('scrape_meta')) {
 function scrape_meta($url) {
 
+       $a = get_app();
+
        $ret = array();
+
+       logger('scrape_meta: url=' . $url);
+
        $s = fetch_url($url);
 
        if(! $s) 
                return $ret;
 
+       $headers = $a->get_curl_headers();
+       logger('scrape_meta: headers=' . $headers, LOGGER_DEBUG);
+
+       $lines = explode("\n",$headers);
+       if(count($lines)) {
+               foreach($lines as $line) {                              
+                       // don't try and run feeds through the html5 parser
+                       if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml'))))
+                               return ret;
+               }
+       }
+
+
+
        $dom = HTML5_Parser::parse($s);
 
        if(! $dom)
@@ -105,12 +143,27 @@ function scrape_meta($url) {
 if(! function_exists('scrape_vcard')) {
 function scrape_vcard($url) {
 
+       $a = get_app();
+
        $ret = array();
+
+       logger('scrape_vcard: url=' . $url);
+
        $s = fetch_url($url);
 
        if(! $s) 
                return $ret;
 
+       $headers = $a->get_curl_headers();
+       $lines = explode("\n",$headers);
+       if(count($lines)) {
+               foreach($lines as $line) {                              
+                       // don't try and run feeds through the html5 parser
+                       if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml'))))
+                               return ret;
+               }
+       }
+
        $dom = HTML5_Parser::parse($s);
 
        if(! $dom)
@@ -142,12 +195,33 @@ function scrape_vcard($url) {
 if(! function_exists('scrape_feed')) {
 function scrape_feed($url) {
 
+       $a = get_app();
+
        $ret = array();
        $s = fetch_url($url);
 
        if(! $s) 
                return $ret;
 
+       $headers = $a->get_curl_headers();
+       logger('scrape_feed: headers=' . $headers, LOGGER_DEBUG);
+
+       $lines = explode("\n",$headers);
+       if(count($lines)) {
+               foreach($lines as $line) {                              
+                       if(stristr($line,'content-type:')) {
+                               if(stristr($line,'application/atom+xml') || stristr($s,'<feed')) {
+                                       $ret['feed_atom'] = $url;
+                                       return $ret;
+                               }
+                               if(stristr($line,'application/rss+xml') || stristr($s,'<rss')) {
+                                       $ret['feed_rss'] = $url;
+                                       return ret;
+                               }
+                       }
+               }
+       }
+
        $dom = HTML5_Parser::parse($s);
 
        if(! $dom)