X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=mod%2Fparse_url.php;h=27dac4d5d318565483a392f3a0a7170e2dae4868;hb=851264161f1b0e638a818a0ebfa80feeb46dfa44;hp=46c6b46e97998e3db59b542d11b3b15b937c8fe4;hpb=1d35d1e5a1c807ea391d7958657b13a6bb471a0b;p=friendica.git

diff --git a/mod/parse_url.php b/mod/parse_url.php
index 46c6b46e97..27dac4d5d3 100644
--- a/mod/parse_url.php
+++ b/mod/parse_url.php
@@ -3,17 +3,48 @@
 require_once('library/HTML5/Parser.php');
 require_once('library/HTMLPurifier.auto.php');
 
+function arr_add_hashes(&$item,$k) {
+	$item = '#' . $item;
+}
+
 function parse_url_content(&$a) {
 
-	logger('parse_url: ' . $_GET['url']);
+	$text = null;
+	$str_tags = '';
+
+	$textmode = false;
+	if(local_user() && intval(get_pconfig(local_user(),'system','plaintext')))
+		$textmode = true;
+
+	if($textmode)
+	$br = (($textmode) ? "\n" : '<br /?');
+
+	if(x($_GET,'binurl'))
+		$url = trim(hex2bin($_GET['binurl']));
+	else
+		$url = trim($_GET['url']);
 
-	$url = trim(hex2bin($_GET['url']));
+	if($_GET['title'])
+		$title = strip_tags(trim($_GET['title']));
+
+	if($_GET['description'])
+		$text = strip_tags(trim($_GET['description']));
+
+	if($_GET['tags']) {
+		$arr_tags = str_getcsv($_GET['tags']);
+		if(count($arr_tags)) {
+			array_walk($arr_tags,'arr_add_hashes');
+			$str_tags = $br . implode(' ',$arr_tags) . $br;
+		}
+	}
 
 	logger('parse_url: ' . $url);
 
-	$text = null;
 
-	$template = "<br /><a href=\"%s\" >%s</a>%s<br />";
+	if($textmode)
+		$template = $br . '[bookmark=%s]%s[/bookmark]%s' . $br;
+	else
+		$template = "<br /><a class=\"bookmark\" href=\"%s\" >%s</a>%s<br />";
 
 
 	$arr = array('url' => $url, 'text' => '');
@@ -25,6 +56,25 @@ function parse_url_content(&$a) {
 		killme();
 	}
 
+
+	if($url && $title && $text) {
+
+		if($textmode)
+			$text = $br . $br . '[quote]' . $text . '[/quote]' . $br;
+		else
+			$text = '<br /><br /><blockquote>' . $text . '</blockquote><br />';
+
+		$title = str_replace(array("\r","\n"),array('',''),$title);
+
+		$result = sprintf($template,$url,($title) ? $title : $url,$text) . $str_tags;
+
+		logger('parse_url (unparsed): returns: ' . $result); 
+
+		echo $result;
+		killme();
+	}
+
+
 	if($url) {
 		$s = fetch_url($url);
 	} else {
@@ -32,31 +82,49 @@ function parse_url_content(&$a) {
 		killme();
 	}
 
-	logger('parse_url: data: ' . $s, LOGGER_DATA);
+//	logger('parse_url: data: ' . $s, LOGGER_DATA);
 
 	if(! $s) {
-		echo sprintf($template,$url,$url,'');
+		echo sprintf($template,$url,$url,'') . $str_tags;
 		killme();
 	}
 
-	if(strpos($s,'<title>')) {
-		$title = substr($s,strpos($s,'<title>')+7,64);
-		if(strpos($title,'<') !== false)
-			$title = strip_tags(substr($title,0,strpos($title,'<')));
+	$matches = '';
+	$c = preg_match('/\<head(.*?)\>(.*?)\<\/head\>/ism',$s,$matches);
+	if($c) {
+//		logger('parse_url: header: ' . $matches[2], LOGGER_DATA);
+		try {
+			$domhead = HTML5_Parser::parse($matches[2]);
+		} catch (DOMException $e) {
+			logger('scrape_dfrn: parse error: ' . $e);
+		}
+		if($domhead)
+			logger('parsed header');
+	}
+
+	if(! $title) {
+		if(strpos($s,'<title>')) {
+			$title = substr($s,strpos($s,'<title>')+7,64);
+			if(strpos($title,'<') !== false)
+				$title = strip_tags(substr($title,0,strpos($title,'<')));
+		}
 	}
 
 	$config = HTMLPurifier_Config::createDefault();
 	$config->set('Cache.DefinitionImpl', null);
-
 	$purifier = new HTMLPurifier($config);
 	$s = $purifier->purify($s);
 
-//	logger('parse_url: purified: ' . $s, LOGGER_DATA);
+//	logger('purify_output: ' . $s);
 
-	$dom = @HTML5_Parser::parse($s);
+	try {
+		$dom = HTML5_Parser::parse($s);
+	} catch (DOMException $e) {
+		logger('scrape_dfrn: parse error: ' . $e);
+	}
 
 	if(! $dom) {
-		echo sprintf($template,$url,$url,'');
+		echo sprintf($template,$url,$url,'') . $str_tags;
 		killme();
 	}
 
@@ -69,56 +137,128 @@ function parse_url_content(&$a) {
 		}
 	}
 
-	$divs = $dom->getElementsByTagName('div');
-	if($divs) {
-		foreach($divs as $div) {
-			$class = $div->getAttribute('class');
-			if($class && (stristr($class,'article') || stristr($class,'content'))) {
-				$items = $div->getElementsByTagName('p');
-				if($items) {
-					foreach($items as $item) {
-						$text = $item->textContent;
-						if(stristr($text,'<script')) {
-							$text = '';
-							continue;
-						}
-						$text = strip_tags($text);
-						if(strlen($text) < 100) {
-							$text = '';
-							continue;
+
+	if(! $text) {
+		$divs = $dom->getElementsByTagName('div');
+		if($divs) {
+			foreach($divs as $div) {
+				$class = $div->getAttribute('class');
+				if($class && (stristr($class,'article') || stristr($class,'content'))) {
+					$items = $div->getElementsByTagName('p');
+					if($items) {
+						foreach($items as $item) {
+							$text = $item->textContent;
+							if(stristr($text,'<script')) {
+								$text = '';
+								continue;
+							}
+							$text = strip_tags($text);
+							if(strlen($text) < 100) {
+								$text = '';
+								continue;
+							}
+							$text = substr($text,0,250) . '...' ;
+							break;
 						}
-						$text = substr($text,0,250) . '...' ;
-						break;
 					}
 				}
+				if($text)
+					break;
+			}
+		}
+
+		if(! $text) {
+			$items = $dom->getElementsByTagName('p');
+			if($items) {
+				foreach($items as $item) {
+					$text = $item->textContent;
+					if(stristr($text,'<script'))
+						continue;
+					$text = strip_tags($text);
+					if(strlen($text) < 100) {
+						$text = '';
+						continue;
+					}
+					$text = substr($text,0,250) . '...' ;
+					break;
+				}
 			}
-			if($text)
-				break;
 		}
 	}
 
 	if(! $text) {
-		$items = $dom->getElementsByTagName('p');
+		logger('parsing meta');
+		$items = (isset($domhead) && is_object($domhead) ? $domhead->getElementsByTagName('meta') : null);
 		if($items) {
 			foreach($items as $item) {
-				$text = $item->textContent;
-				if(stristr($text,'<script'))
-					continue;
-				$text = strip_tags($text);
-				if(strlen($text) < 100) {
-					$text = '';
-					continue;
+				$property = $item->getAttribute('property');
+				if($property && (stristr($property,':description'))) {
+
+					$text = $item->getAttribute('content');
+					if(stristr($text,'<script')) {
+						$text = '';
+						continue;
+					}
+					$text = strip_tags($text);
+
+
+					$text = substr($text,0,250) . '...' ;
+				}
+				if($property && (stristr($property,':image'))) {
+
+					$image = $item->getAttribute('content');
+					if(stristr($text,'<script')) {
+						$image = '';
+						continue;
+					}
+					$image = strip_tags($image);
+					
+					$i = fetch_url($image);
+					if($i) {
+						require_once('include/Photo.php');
+						$ph = new Photo($i);
+						if($ph->is_valid()) {
+							if($ph->getWidth() > 300 || $ph->getHeight() > 300) {
+								$ph->scaleImage(300);
+								$new_width = $ph->getWidth();
+								$new_height = $ph->getHeight();
+								if($textmode)
+									$image = $br . $br . '[img=' . $new_width . 'x' . $new_height . ']' . $image . '[/img]';
+								else
+									$image = '<br /><br /><img height="' . $new_height . '" width="' . $new_width . '" src="' .$image . '" alt="photo" />';
+							}
+							else {
+								if($textmode) 
+									$image = $br . $br . '[img]' . $image . '[/img]';
+								else
+									$image = '<br /><br /><img src="' . $image . '" alt="photo" />';
+							}
+						}
+						else
+							$image = '';
+					
+					}
 				}
-				$text = substr($text,0,250) . '...' ;
-				break;
 			}
 		}
 	}
 
 	if(strlen($text)) {
-		$text = '<br /><br />' . $text;
+		if($textmode)
+			$text = $br .$br . '[quote]' . $text . '[/quote]' . $br ;
+		else
+			$text = '<br /><br /><blockquote>' . $text . '</blockquote><br />';
+	}
+
+	if($image) {
+		$text = $image . $br . $text;
 	}
+	$title = str_replace(array("\r","\n"),array('',''),$title);
+
+	$result = sprintf($template,$url,($title) ? $title : $url,$text) . $str_tags;
+
+	logger('parse_url: returns: ' . $result); 
 
-	echo sprintf($template,$url,($title) ? $title : $url,$text);
+	echo $result;
 	killme();
 }