3 require_once('library/HTML5/Parser.php');
4 require_once('library/HTMLPurifier.auto.php');
6 function parse_url_content(&$a) {
8 logger('parse_url: ' . $_GET['url']);
10 $url = trim(hex2bin($_GET['url']));
12 logger('parse_url: ' . $url);
16 $template = "<a href=\"%s\" >%s</a>\n%s";
19 $arr = array('url' => $url, 'text' => '');
21 call_hooks('parse_link', $arr);
23 if(strlen($arr['text'])) {
35 logger('parse_url: data: ' . $s, LOGGER_DATA);
38 echo sprintf($template,$url,$url,'');
42 if(strpos($s,'<title>')) {
43 $title = substr($s,strpos($s,'<title>')+7,64);
44 if(strpos($title,'<') !== false)
45 $title = substr($title,0,strpos($title,'<'));
48 $config = HTMLPurifier_Config::createDefault();
49 $config->set('Cache.DefinitionImpl', null);
51 $purifier = new HTMLPurifier($config);
52 $s = $purifier->purify($s);
54 $dom = @HTML5_Parser::parse($s);
57 echo sprintf($template,$url,$url,'');
61 $items = $dom->getElementsByTagName('title');
64 foreach($items as $item) {
65 $title = trim($item->textContent);
70 $divs = $dom->getElementsByTagName('div');
72 foreach($divs as $div) {
73 $class = $div->getAttribute('class');
74 if($class && stristr($class,'article')) {
75 $items = $div->getElementsByTagName('p');
77 foreach($items as $item) {
78 if($item->getElementsByTagName('script'))
80 $text = $item->textContent;
81 $text = strip_tags($text);
82 if(strlen($text) < 100)
84 $text = substr($text,0,250) . '...' ;
93 $items = $dom->getElementsByTagName('p');
95 foreach($items as $item) {
96 if($item->getElementsByTagName('script'))
98 $text = $item->textContent;
99 $text = strip_tags($text);
100 if(strlen($text) < 100)
102 $text = substr($text,0,250) . '...' ;
109 $text = '<br />' . $text;
112 echo sprintf($template,$url,($title) ? $title : $url,$text);