X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=include%2FScrape.php;h=bb42c3bdd3474946db9325ad4fb6846141c5571e;hb=8c5c07b2654f5ff28cea1a7012198f74c5b1af03;hp=cc50151657b763e505a03acb5b240bad45f77830;hpb=6348e70daa113e8b3203de8fbc919d08c90d972e;p=friendica.git

diff --git a/include/Scrape.php b/include/Scrape.php
index cc50151657..bb42c3bdd3 100644
--- a/include/Scrape.php
+++ b/include/Scrape.php
@@ -2,24 +2,28 @@
 
 require_once('library/HTML5/Parser.php');
 
-if(! function_exists('attribute_contains')) {
-function attribute_contains($attr,$s) {
-	$a = explode(' ', $attr);
-	if(count($a) && in_array($s,$a))
-		return true;
-	return false;
-}}
-
-
 if(! function_exists('scrape_dfrn')) {
 function scrape_dfrn($url) {
 
+	$a = get_app();
+
 	$ret = array();
 	$s = fetch_url($url);
 
 	if(! $s) 
 		return $ret;
 
+	$headers = $a->get_curl_headers();
+	$lines = explode("\n",$headers);
+	if(count($lines)) {
+		foreach($lines as $line) {				
+			// don't try and run feeds through the html5 parser
+			if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml'))))
+				return ret;
+		}
+	}
+
+
 	$dom = HTML5_Parser::parse($s);
 
 	if(! $dom)
@@ -31,8 +35,15 @@ function scrape_dfrn($url) {
 
 	foreach($items as $item) {
 		$x = $item->getAttribute('rel');
+		if(($x === 'alternate') && ($item->getAttribute('type') === 'application/atom+xml'))
+			$ret['feed_atom'] = $item->getAttribute('href');
 		if(substr($x,0,5) == "dfrn-")
 			$ret[$x] = $item->getAttribute('href');
+		if($x === 'lrdd') {
+			$decoded = urldecode($item->getAttribute('href'));
+			if(preg_match('/acct:([^@]*)@/',$decoded,$matches))
+				$ret['nick'] = $matches[1];
+		}
 	}
 
 	// Pull out hCard profile elements
@@ -48,7 +59,7 @@ function scrape_dfrn($url) {
 					$ret['photo'] = $x->getAttribute('src');
 				if(attribute_contains($x->getAttribute('class'),'key'))
 					$ret['key'] = $x->textContent;
-        		}
+			}
 		}
 	}
 
@@ -76,5 +87,141 @@ function validate_dfrn($a) {
 	return $errors;
 }}
 
+if(! function_exists('scrape_meta')) {
+function scrape_meta($url) {
+
+	$a = get_app();
+
+	$ret = array();
+	$s = fetch_url($url);
+
+	if(! $s) 
+		return $ret;
+
+	$headers = $a->get_curl_headers();
+	$lines = explode("\n",$headers);
+	if(count($lines)) {
+		foreach($lines as $line) {				
+			// don't try and run feeds through the html5 parser
+			if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml'))))
+				return ret;
+		}
+	}
+
+
 
+	$dom = HTML5_Parser::parse($s);
+
+	if(! $dom)
+		return $ret;
+
+	$items = $dom->getElementsByTagName('meta');
+
+	// get DFRN link elements
+
+	foreach($items as $item) {
+		$x = $item->getAttribute('name');
+		if(substr($x,0,5) == "dfrn-")
+			$ret[$x] = $item->getAttribute('content');
+	}
+
+	return $ret;
+}}
+
+
+if(! function_exists('scrape_vcard')) {
+function scrape_vcard($url) {
+
+	$a = get_app();
+
+	$ret = array();
+	$s = fetch_url($url);
+
+	if(! $s) 
+		return $ret;
+
+	$headers = $a->get_curl_headers();
+	$lines = explode("\n",$headers);
+	if(count($lines)) {
+		foreach($lines as $line) {				
+			// don't try and run feeds through the html5 parser
+			if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml'))))
+				return ret;
+		}
+	}
 
+	$dom = HTML5_Parser::parse($s);
+
+	if(! $dom)
+		return $ret;
+
+	// Pull out hCard profile elements
+
+	$items = $dom->getElementsByTagName('*');
+	foreach($items as $item) {
+		if(attribute_contains($item->getAttribute('class'), 'vcard')) {
+			$level2 = $item->getElementsByTagName('*');
+			foreach($level2 as $x) {
+				if(attribute_contains($x->getAttribute('class'),'fn'))
+					$ret['fn'] = $x->textContent;
+				if((attribute_contains($x->getAttribute('class'),'photo'))
+					|| (attribute_contains($x->getAttribute('class'),'avatar')))
+					$ret['photo'] = $x->getAttribute('src');
+				if((attribute_contains($x->getAttribute('class'),'nickname'))
+					|| (attribute_contains($x->getAttribute('class'),'uid')))
+					$ret['nick'] = $x->textContent;
+			}
+		}
+	}
+
+	return $ret;
+}}
+
+
+if(! function_exists('scrape_feed')) {
+function scrape_feed($url) {
+
+	$a = get_app();
+
+	$ret = array();
+	$s = fetch_url($url);
+
+	if(! $s) 
+		return $ret;
+
+	$headers = $a->get_curl_headers();
+	$lines = explode("\n",$headers);
+	if(count($lines)) {
+		foreach($lines as $line) {				
+			if(stristr($line,'content-type:')) {
+				if(stristr($line,'application/atom+xml')) {
+					$ret['feed_atom'] = $url;
+					return $ret;
+				}
+ 				if(stristr($line,'application/rss+xml')) {
+					$ret['feed_rss'] = $url;
+					return ret;
+				}
+			}
+		}
+	}
+
+	$dom = HTML5_Parser::parse($s);
+
+	if(! $dom)
+		return $ret;
+
+	$items = $dom->getElementsByTagName('link');
+
+	// get Atom link elements
+
+	foreach($items as $item) {
+		$x = $item->getAttribute('rel');
+		if(($x === 'alternate') && ($item->getAttribute('type') === 'application/atom+xml'))
+			$ret['feed_atom'] = $item->getAttribute('href');
+		if(($x === 'alternate') && ($item->getAttribute('type') === 'application/rss+xml'))
+			$ret['feed_rss'] = $item->getAttribute('href');
+	}
+
+	return $ret;
+}}
\ No newline at end of file