3 require_once('library/HTML5/Parser.php');
4 require_once('library/HTMLPurifier.auto.php');
6 function parse_url_content(&$a) {
8 logger('parse_url: ' . $_GET['url']);
10 $url = trim(hex2bin($_GET['url']));
12 logger('parse_url: ' . $url);
16 $template = "<br /><a href=\"%s\" >%s</a>%s<br />";
19 $arr = array('url' => $url, 'text' => '');
21 call_hooks('parse_link', $arr);
23 if(strlen($arr['text'])) {
35 logger('parse_url: data: ' . $s, LOGGER_DATA);
38 echo sprintf($template,$url,$url,'');
42 if(strpos($s,'<title>')) {
43 $title = substr($s,strpos($s,'<title>')+7,64);
44 if(strpos($title,'<') !== false)
45 $title = strip_tags(substr($title,0,strpos($title,'<')));
48 $config = HTMLPurifier_Config::createDefault();
49 $config->set('Cache.DefinitionImpl', null);
51 $purifier = new HTMLPurifier($config);
52 $s = $purifier->purify($s);
54 // logger('parse_url: purified: ' . $s, LOGGER_DATA);
56 $dom = @HTML5_Parser::parse($s);
59 echo sprintf($template,$url,$url,'');
63 $items = $dom->getElementsByTagName('title');
66 foreach($items as $item) {
67 $title = trim($item->textContent);
72 $divs = $dom->getElementsByTagName('div');
74 foreach($divs as $div) {
75 $class = $div->getAttribute('class');
76 if($class && (stristr($class,'article') || stristr($class,'content'))) {
77 $items = $div->getElementsByTagName('p');
79 foreach($items as $item) {
80 $text = $item->textContent;
81 if(stristr($text,'<script')) {
85 $text = strip_tags($text);
86 if(strlen($text) < 100) {
90 $text = substr($text,0,250) . '...' ;
101 $items = $dom->getElementsByTagName('p');
103 foreach($items as $item) {
104 $text = $item->textContent;
105 if(stristr($text,'<script'))
107 $text = strip_tags($text);
108 if(strlen($text) < 100) {
112 $text = substr($text,0,250) . '...' ;
119 $text = '<br /><br /><blockquote>' . $text . '</blockquote><br />';
122 echo sprintf($template,$url,($title) ? $title : $url,$text);