3 require_once('library/HTML5/Parser.php');
4 require_once('library/HTMLPurifier.auto.php');
6 function arr_add_hashes(&$item,$k) {
10 function parse_url_content(&$a) {
16 if(local_user() && intval(get_pconfig(local_user(),'system','plaintext')))
20 $br = (($textmode) ? "\n" : '<br /?');
23 $url = trim(hex2bin($_GET['binurl']));
25 $url = trim($_GET['url']);
28 $title = strip_tags(trim($_GET['title']));
30 if($_GET['description'])
31 $text = strip_tags(trim($_GET['description']));
34 $arr_tags = str_getcsv($_GET['tags']);
35 if(count($arr_tags)) {
36 array_walk($arr_tags,'arr_add_hashes');
37 $str_tags = $br . implode(' ',$arr_tags) . $br;
41 logger('parse_url: ' . $url);
45 $template = $br . '[bookmark=%s]%s[/bookmark]%s' . $br;
47 $template = "<br /><a class=\"bookmark\" href=\"%s\" >%s</a>%s<br />";
50 $arr = array('url' => $url, 'text' => '');
52 call_hooks('parse_link', $arr);
54 if(strlen($arr['text'])) {
60 if($url && $title && $text) {
63 $text = $br . $br . '[quote]' . $text . '[/quote]' . $br;
65 $text = '<br /><br /><blockquote>' . $text . '</blockquote><br />';
67 $title = str_replace(array("\r","\n"),array('',''),$title);
69 $result = sprintf($template,$url,($title) ? $title : $url,$text) . $str_tags;
71 logger('parse_url (unparsed): returns: ' . $result);
85 // logger('parse_url: data: ' . $s, LOGGER_DATA);
88 echo sprintf($template,$url,$url,'') . $str_tags;
93 $c = preg_match('/\<head(.*?)\>(.*?)\<\/head\>/ism',$s,$matches);
95 // logger('parse_url: header: ' . $matches[2], LOGGER_DATA);
97 $domhead = HTML5_Parser::parse($matches[2]);
98 } catch (DOMException $e) {
99 logger('scrape_dfrn: parse error: ' . $e);
102 logger('parsed header');
106 if(strpos($s,'<title>')) {
107 $title = substr($s,strpos($s,'<title>')+7,64);
108 if(strpos($title,'<') !== false)
109 $title = strip_tags(substr($title,0,strpos($title,'<')));
113 $config = HTMLPurifier_Config::createDefault();
114 $config->set('Cache.DefinitionImpl', null);
115 $purifier = new HTMLPurifier($config);
116 $s = $purifier->purify($s);
118 // logger('purify_output: ' . $s);
121 $dom = HTML5_Parser::parse($s);
122 } catch (DOMException $e) {
123 logger('scrape_dfrn: parse error: ' . $e);
127 echo sprintf($template,$url,$url,'') . $str_tags;
131 $items = $dom->getElementsByTagName('title');
134 foreach($items as $item) {
135 $title = trim($item->textContent);
142 $divs = $dom->getElementsByTagName('div');
144 foreach($divs as $div) {
145 $class = $div->getAttribute('class');
146 if($class && (stristr($class,'article') || stristr($class,'content'))) {
147 $items = $div->getElementsByTagName('p');
149 foreach($items as $item) {
150 $text = $item->textContent;
151 if(stristr($text,'<script')) {
155 $text = strip_tags($text);
156 if(strlen($text) < 100) {
160 $text = substr($text,0,250) . '...' ;
171 $items = $dom->getElementsByTagName('p');
173 foreach($items as $item) {
174 $text = $item->textContent;
175 if(stristr($text,'<script'))
177 $text = strip_tags($text);
178 if(strlen($text) < 100) {
182 $text = substr($text,0,250) . '...' ;
190 logger('parsing meta');
191 $items = (isset($domhead) && is_object($domhead) ? $domhead->getElementsByTagName('meta') : null);
193 foreach($items as $item) {
194 $property = $item->getAttribute('property');
195 if($property && (stristr($property,':description'))) {
197 $text = $item->getAttribute('content');
198 if(stristr($text,'<script')) {
202 $text = strip_tags($text);
205 $text = substr($text,0,250) . '...' ;
207 if($property && (stristr($property,':image'))) {
209 $image = $item->getAttribute('content');
210 if(stristr($text,'<script')) {
214 $image = strip_tags($image);
216 $i = fetch_url($image);
218 require_once('include/Photo.php');
219 // guess mimetype from headers or filename
220 $type = guess_image_type($image,true);
222 $ph = new Photo($i, $type);
223 if($ph->is_valid()) {
224 if($ph->getWidth() > 300 || $ph->getHeight() > 300) {
225 $ph->scaleImage(300);
226 $new_width = $ph->getWidth();
227 $new_height = $ph->getHeight();
229 $image = $br . $br . '[img=' . $new_width . 'x' . $new_height . ']' . $image . '[/img]';
231 $image = '<br /><br /><img height="' . $new_height . '" width="' . $new_width . '" src="' .$image . '" alt="photo" />';
235 $image = $br . $br . '[img]' . $image . '[/img]';
237 $image = '<br /><br /><img src="' . $image . '" alt="photo" />';
251 $text = $br .$br . '[quote]' . $text . '[/quote]' . $br ;
253 $text = '<br /><br /><blockquote>' . $text . '</blockquote><br />';
257 $text = $image . $br . $text;
259 $title = str_replace(array("\r","\n"),array('',''),$title);
261 $result = sprintf($template,$url,($title) ? $title : $url,$text) . $str_tags;
263 logger('parse_url: returns: ' . $result);