X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=include%2FScrape.php;h=ff98992526314474b6dcc7aa20386a92f114c332;hb=b7292bc673bc7983b1fd933070bc9d6a0a155dbb;hp=e4f7a0878c71016332f7f28bee75b1f97bef8bf1;hpb=08d6fe5ae74cc223e4385344f8e0edfde4208a23;p=friendica.git diff --git a/include/Scrape.php b/include/Scrape.php index e4f7a0878c..ff98992526 100644 --- a/include/Scrape.php +++ b/include/Scrape.php @@ -5,12 +5,31 @@ require_once('library/HTML5/Parser.php'); if(! function_exists('scrape_dfrn')) { function scrape_dfrn($url) { + $a = get_app(); + $ret = array(); + + logger('scrape_dfrn: url=' . $url); + $s = fetch_url($url); if(! $s) return $ret; + $headers = $a->get_curl_headers(); + logger('scrape_dfrn: headers=' . $headers, LOGGER_DEBUG); + + + $lines = explode("\n",$headers); + if(count($lines)) { + foreach($lines as $line) { + // don't try and run feeds through the html5 parser + if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml')))) + return ret; + } + } + + $dom = HTML5_Parser::parse($s); if(! $dom) @@ -77,12 +96,31 @@ function validate_dfrn($a) { if(! function_exists('scrape_meta')) { function scrape_meta($url) { + $a = get_app(); + $ret = array(); + + logger('scrape_meta: url=' . $url); + $s = fetch_url($url); if(! $s) return $ret; + $headers = $a->get_curl_headers(); + logger('scrape_meta: headers=' . $headers, LOGGER_DEBUG); + + $lines = explode("\n",$headers); + if(count($lines)) { + foreach($lines as $line) { + // don't try and run feeds through the html5 parser + if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml')))) + return ret; + } + } + + + $dom = HTML5_Parser::parse($s); if(! $dom) @@ -105,12 +143,27 @@ function scrape_meta($url) { if(! function_exists('scrape_vcard')) { function scrape_vcard($url) { + $a = get_app(); + $ret = array(); + + logger('scrape_vcard: url=' . $url); + $s = fetch_url($url); if(! $s) return $ret; + $headers = $a->get_curl_headers(); + $lines = explode("\n",$headers); + if(count($lines)) { + foreach($lines as $line) { + // don't try and run feeds through the html5 parser + if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml')))) + return ret; + } + } + $dom = HTML5_Parser::parse($s); if(! $dom) @@ -142,12 +195,33 @@ function scrape_vcard($url) { if(! function_exists('scrape_feed')) { function scrape_feed($url) { + $a = get_app(); + $ret = array(); $s = fetch_url($url); if(! $s) return $ret; + $headers = $a->get_curl_headers(); + logger('scrape_feed: headers=' . $headers, LOGGER_DEBUG); + + $lines = explode("\n",$headers); + if(count($lines)) { + foreach($lines as $line) { + if(stristr($line,'content-type:')) { + if(stristr($line,'application/atom+xml') || stristr($s,'