X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=mod%2Fparse_url.php;h=a238ecb2f915cabb071787f3e0156867611c7a27;hb=906addf865ccab2c98d5035fc6c1b10d43004dfe;hp=b3b42b6cb6f2ced24b4da9be57d3d16222116c9e;hpb=2e19af4b1e23691abc33e0b5a5af5adae36aec42;p=friendica.git

diff --git a/mod/parse_url.php b/mod/parse_url.php
index b3b42b6cb6..a238ecb2f9 100644
--- a/mod/parse_url.php
+++ b/mod/parse_url.php
@@ -1,15 +1,40 @@
 <?php
 
 require_once('library/HTML5/Parser.php');
+require_once('library/HTMLPurifier.auto.php');
 
+function arr_add_hashes(&$item,$k) {
+	$item = '#' . $item;
+}
 
 function parse_url_content(&$a) {
 
-	$url = trim($_GET['url']);
-
 	$text = null;
+	$str_tags = '';
+
+	if(x($_GET,'binurl'))
+		$url = trim(hex2bin($_GET['binurl']));
+	else
+		$url = trim($_GET['url']);
+
+	if($_GET['title'])
+		$title = strip_tags(trim($_GET['title']));
+
+	if($_GET['description'])
+		$text = strip_tags(trim($_GET['description']));
+
+	if($_GET['tags']) {
+		$arr_tags = str_getcsv($_GET['tags']);
+		if(count($arr_tags)) {
+			array_walk($arr_tags,'arr_add_hashes');
+			$str_tags = '<br />' . implode(' ',$arr_tags) . '<br />'; 		
+		}
+	}
+
+	logger('parse_url: ' . $url);
+
 
-	$template = "<a href=\"%s\" >%s</a>%s";
+	$template = "<br /><a class=\"bookmark\" href=\"%s\" >%s</a>%s<br />";
 
 
 	$arr = array('url' => $url, 'text' => '');
@@ -21,23 +46,56 @@ function parse_url_content(&$a) {
 		killme();
 	}
 
-	if($url) 
+	if($url && $title && $text) {
+
+		$text = '<br /><br /><blockquote>' . $text . '</blockquote><br />';
+		$title = str_replace(array("\r","\n"),array('',''),$title);
+
+		$result = sprintf($template,$url,($title) ? $title : $url,$text) . $str_tags;
+
+		logger('parse_url (unparsed): returns: ' . $result); 
+
+		echo $result;
+		killme();
+	}
+
+
+	if($url) {
 		$s = fetch_url($url);
-	else {
+	} else {
 		echo '';
 		killme();
 	}
 
+	logger('parse_url: data: ' . $s, LOGGER_DATA);
 
 	if(! $s) {
-		echo sprintf($template,$url,$url,'');
+		echo sprintf($template,$url,$url,'') . $str_tags;
 		killme();
 	}
 
+	if(! $title) {
+		if(strpos($s,'<title>')) {
+			$title = substr($s,strpos($s,'<title>')+7,64);
+			if(strpos($title,'<') !== false)
+				$title = strip_tags(substr($title,0,strpos($title,'<')));
+		}
+	}
+
+	$config = HTMLPurifier_Config::createDefault();
+	$config->set('Cache.DefinitionImpl', null);
+
+	$purifier = new HTMLPurifier($config);
+	$s = $purifier->purify($s);
+
+//	logger('parse_url: purified: ' . $s, LOGGER_DATA);
+
 	$dom = @HTML5_Parser::parse($s);
 
-	if(! $dom)
-		return $ret;
+	if(! $dom) {
+		echo sprintf($template,$url,$url,'') . $str_tags;
+		killme();
+	}
 
 	$items = $dom->getElementsByTagName('title');
 
@@ -49,44 +107,64 @@ function parse_url_content(&$a) {
 	}
 
 
-	$divs = $dom->getElementsByTagName('div');
-	if($divs) {
-		foreach($divs as $div) {
-			$class = $div->getAttribute('class');
-			if($class && stristr($class,'article')) {
-				$items = $div->getElementsByTagName('p');
-				if($items) {
-					foreach($items as $item) {
-						$text = $item->textContent;
-						$text = strip_tags($text);
-						if(strlen($text) < 100)
-							continue;
-						$text = substr($text,0,250) . '...' ;
-						break;
+	if(! $text) {
+		$divs = $dom->getElementsByTagName('div');
+		if($divs) {
+			foreach($divs as $div) {
+				$class = $div->getAttribute('class');
+				if($class && (stristr($class,'article') || stristr($class,'content'))) {
+					$items = $div->getElementsByTagName('p');
+					if($items) {
+						foreach($items as $item) {
+							$text = $item->textContent;
+							if(stristr($text,'<script')) {
+								$text = '';
+								continue;
+							}
+							$text = strip_tags($text);
+							if(strlen($text) < 100) {
+								$text = '';
+								continue;
+							}
+							$text = substr($text,0,250) . '...' ;
+							break;
+						}
 					}
 				}
+				if($text)
+					break;
 			}
 		}
-	}
 
-	if(! $text) {
-		$items = $dom->getElementsByTagName('p');
-		if($items) {
-			foreach($items as $item) {
-				$text = $item->textContent;
-				$text = strip_tags($text);
-				if(strlen($text) < 100)
-					continue;
-				$text = substr($text,0,250) . '...' ;
-				break;
+		if(! $text) {
+			$items = $dom->getElementsByTagName('p');
+			if($items) {
+				foreach($items as $item) {
+					$text = $item->textContent;
+					if(stristr($text,'<script'))
+						continue;
+					$text = strip_tags($text);
+					if(strlen($text) < 100) {
+						$text = '';
+						continue;
+					}
+					$text = substr($text,0,250) . '...' ;
+					break;
+				}
 			}
 		}
 	}
 
 	if(strlen($text)) {
-		$text = '<br />' . $text;
+		$text = '<br /><br /><blockquote>' . $text . '</blockquote><br />';
 	}
 
-	echo sprintf($template,$url,$title,$text);
+	$title = str_replace(array("\r","\n"),array('',''),$title);
+
+	$result = sprintf($template,$url,($title) ? $title : $url,$text) . $str_tags;
+
+	logger('parse_url: returns: ' . $result); 
+
+	echo $result;
 	killme();
-}
\ No newline at end of file
+}