X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=mod%2Fparse_url.php;h=27dac4d5d318565483a392f3a0a7170e2dae4868;hb=851264161f1b0e638a818a0ebfa80feeb46dfa44;hp=46c6b46e97998e3db59b542d11b3b15b937c8fe4;hpb=1d35d1e5a1c807ea391d7958657b13a6bb471a0b;p=friendica.git
diff --git a/mod/parse_url.php b/mod/parse_url.php
index 46c6b46e97..27dac4d5d3 100644
--- a/mod/parse_url.php
+++ b/mod/parse_url.php
@@ -3,17 +3,48 @@
require_once('library/HTML5/Parser.php');
require_once('library/HTMLPurifier.auto.php');
+function arr_add_hashes(&$item,$k) {
+ $item = '#' . $item;
+}
+
function parse_url_content(&$a) {
- logger('parse_url: ' . $_GET['url']);
+ $text = null;
+ $str_tags = '';
+
+ $textmode = false;
+ if(local_user() && intval(get_pconfig(local_user(),'system','plaintext')))
+ $textmode = true;
+
+ if($textmode)
+ $br = (($textmode) ? "\n" : '
%s%s
";
+ if($textmode)
+ $template = $br . '[bookmark=%s]%s[/bookmark]%s' . $br;
+ else
+ $template = "
%s%s
";
$arr = array('url' => $url, 'text' => '');
@@ -25,6 +56,25 @@ function parse_url_content(&$a) {
killme();
}
+
+ if($url && $title && $text) {
+
+ if($textmode)
+ $text = $br . $br . '[quote]' . $text . '[/quote]' . $br;
+ else
+ $text = '
' . $text . '
';
+
+ $title = str_replace(array("\r","\n"),array('',''),$title);
+
+ $result = sprintf($template,$url,($title) ? $title : $url,$text) . $str_tags;
+
+ logger('parse_url (unparsed): returns: ' . $result);
+
+ echo $result;
+ killme();
+ }
+
+
if($url) {
$s = fetch_url($url);
} else {
@@ -32,31 +82,49 @@ function parse_url_content(&$a) {
killme();
}
- logger('parse_url: data: ' . $s, LOGGER_DATA);
+// logger('parse_url: data: ' . $s, LOGGER_DATA);
if(! $s) {
- echo sprintf($template,$url,$url,'');
+ echo sprintf($template,$url,$url,'') . $str_tags;
killme();
}
- if(strpos($s,'')) {
- $title = substr($s,strpos($s,'')+7,64);
- if(strpos($title,'<') !== false)
- $title = strip_tags(substr($title,0,strpos($title,'<')));
+ $matches = '';
+ $c = preg_match('/\(.*?)\<\/head\>/ism',$s,$matches);
+ if($c) {
+// logger('parse_url: header: ' . $matches[2], LOGGER_DATA);
+ try {
+ $domhead = HTML5_Parser::parse($matches[2]);
+ } catch (DOMException $e) {
+ logger('scrape_dfrn: parse error: ' . $e);
+ }
+ if($domhead)
+ logger('parsed header');
+ }
+
+ if(! $title) {
+ if(strpos($s,'')) {
+ $title = substr($s,strpos($s,'')+7,64);
+ if(strpos($title,'<') !== false)
+ $title = strip_tags(substr($title,0,strpos($title,'<')));
+ }
}
$config = HTMLPurifier_Config::createDefault();
$config->set('Cache.DefinitionImpl', null);
-
$purifier = new HTMLPurifier($config);
$s = $purifier->purify($s);
-// logger('parse_url: purified: ' . $s, LOGGER_DATA);
+// logger('purify_output: ' . $s);
- $dom = @HTML5_Parser::parse($s);
+ try {
+ $dom = HTML5_Parser::parse($s);
+ } catch (DOMException $e) {
+ logger('scrape_dfrn: parse error: ' . $e);
+ }
if(! $dom) {
- echo sprintf($template,$url,$url,'');
+ echo sprintf($template,$url,$url,'') . $str_tags;
killme();
}
@@ -69,56 +137,128 @@ function parse_url_content(&$a) {
}
}
- $divs = $dom->getElementsByTagName('div');
- if($divs) {
- foreach($divs as $div) {
- $class = $div->getAttribute('class');
- if($class && (stristr($class,'article') || stristr($class,'content'))) {
- $items = $div->getElementsByTagName('p');
- if($items) {
- foreach($items as $item) {
- $text = $item->textContent;
- if(stristr($text,'