]> git.mxchange.org Git - friendica.git/blob - mod/parse_url.php
Some Bugfixes, and variable checks
[friendica.git] / mod / parse_url.php
1 <?php
2
3 require_once('library/HTML5/Parser.php');
4 require_once('library/HTMLPurifier.auto.php');
5
6 function arr_add_hashes(&$item,$k) {
7         $item = '#' . $item;
8 }
9
10 function parse_url_content(&$a) {
11
12         $text = null;
13         $str_tags = '';
14
15         $textmode = false;
16         if(local_user() && intval(get_pconfig(local_user(),'system','plaintext')))
17                 $textmode = true;
18
19         if($textmode)
20         $br = (($textmode) ? "\n" : '<br /?');
21
22         if(x($_GET,'binurl'))
23                 $url = trim(hex2bin($_GET['binurl']));
24         else
25                 $url = trim($_GET['url']);
26
27         if($_GET['title'])
28                 $title = strip_tags(trim($_GET['title']));
29
30         if($_GET['description'])
31                 $text = strip_tags(trim($_GET['description']));
32
33         if($_GET['tags']) {
34                 $arr_tags = str_getcsv($_GET['tags']);
35                 if(count($arr_tags)) {
36                         array_walk($arr_tags,'arr_add_hashes');
37                         $str_tags = $br . implode(' ',$arr_tags) . $br;
38                 }
39         }
40
41         logger('parse_url: ' . $url);
42
43
44         if($textmode)
45                 $template = $br . '[bookmark=%s]%s[/bookmark]%s' . $br;
46         else
47                 $template = "<br /><a class=\"bookmark\" href=\"%s\" >%s</a>%s<br />";
48
49
50         $arr = array('url' => $url, 'text' => '');
51
52         call_hooks('parse_link', $arr);
53
54         if(strlen($arr['text'])) {
55                 echo $arr['text'];
56                 killme();
57         }
58
59
60         if($url && $title && $text) {
61
62                 if($textmode)
63                         $text = $br . $br . '[quote]' . $text . '[/quote]' . $br;
64                 else
65                         $text = '<br /><br /><blockquote>' . $text . '</blockquote><br />';
66
67                 $title = str_replace(array("\r","\n"),array('',''),$title);
68
69                 $result = sprintf($template,$url,($title) ? $title : $url,$text) . $str_tags;
70
71                 logger('parse_url (unparsed): returns: ' . $result); 
72
73                 echo $result;
74                 killme();
75         }
76
77
78         if($url) {
79                 $s = fetch_url($url);
80         } else {
81                 echo '';
82                 killme();
83         }
84
85 //      logger('parse_url: data: ' . $s, LOGGER_DATA);
86
87         if(! $s) {
88                 echo sprintf($template,$url,$url,'') . $str_tags;
89                 killme();
90         }
91
92         $matches = '';
93         $c = preg_match('/\<head(.*?)\>(.*?)\<\/head\>/ism',$s,$matches);
94         if($c) {
95 //              logger('parse_url: header: ' . $matches[2], LOGGER_DATA);
96                 try {
97                         $domhead = HTML5_Parser::parse($matches[2]);
98                 } catch (DOMException $e) {
99                         logger('scrape_dfrn: parse error: ' . $e);
100                 }
101                 if($domhead)
102                         logger('parsed header');
103         }
104
105         if(! $title) {
106                 if(strpos($s,'<title>')) {
107                         $title = substr($s,strpos($s,'<title>')+7,64);
108                         if(strpos($title,'<') !== false)
109                                 $title = strip_tags(substr($title,0,strpos($title,'<')));
110                 }
111         }
112
113         $config = HTMLPurifier_Config::createDefault();
114         $config->set('Cache.DefinitionImpl', null);
115         $purifier = new HTMLPurifier($config);
116         $s = $purifier->purify($s);
117
118 //      logger('purify_output: ' . $s);
119
120         try {
121                 $dom = HTML5_Parser::parse($s);
122         } catch (DOMException $e) {
123                 logger('scrape_dfrn: parse error: ' . $e);
124         }
125
126         if(! $dom) {
127                 echo sprintf($template,$url,$url,'') . $str_tags;
128                 killme();
129         }
130
131         $items = $dom->getElementsByTagName('title');
132
133         if($items) {
134                 foreach($items as $item) {
135                         $title = trim($item->textContent);
136                         break;
137                 }
138         }
139
140
141         if(! $text) {
142                 $divs = $dom->getElementsByTagName('div');
143                 if($divs) {
144                         foreach($divs as $div) {
145                                 $class = $div->getAttribute('class');
146                                 if($class && (stristr($class,'article') || stristr($class,'content'))) {
147                                         $items = $div->getElementsByTagName('p');
148                                         if($items) {
149                                                 foreach($items as $item) {
150                                                         $text = $item->textContent;
151                                                         if(stristr($text,'<script')) {
152                                                                 $text = '';
153                                                                 continue;
154                                                         }
155                                                         $text = strip_tags($text);
156                                                         if(strlen($text) < 100) {
157                                                                 $text = '';
158                                                                 continue;
159                                                         }
160                                                         $text = substr($text,0,250) . '...' ;
161                                                         break;
162                                                 }
163                                         }
164                                 }
165                                 if($text)
166                                         break;
167                         }
168                 }
169
170                 if(! $text) {
171                         $items = $dom->getElementsByTagName('p');
172                         if($items) {
173                                 foreach($items as $item) {
174                                         $text = $item->textContent;
175                                         if(stristr($text,'<script'))
176                                                 continue;
177                                         $text = strip_tags($text);
178                                         if(strlen($text) < 100) {
179                                                 $text = '';
180                                                 continue;
181                                         }
182                                         $text = substr($text,0,250) . '...' ;
183                                         break;
184                                 }
185                         }
186                 }
187         }
188
189         if(! $text) {
190                 logger('parsing meta');
191                 $items = (isset($domhead) && is_object($domhead) ? $domhead->getElementsByTagName('meta') : null);
192                 if($items) {
193                         foreach($items as $item) {
194                                 $property = $item->getAttribute('property');
195                                 if($property && (stristr($property,':description'))) {
196
197                                         $text = $item->getAttribute('content');
198                                         if(stristr($text,'<script')) {
199                                                 $text = '';
200                                                 continue;
201                                         }
202                                         $text = strip_tags($text);
203
204
205                                         $text = substr($text,0,250) . '...' ;
206                                 }
207                                 if($property && (stristr($property,':image'))) {
208
209                                         $image = $item->getAttribute('content');
210                                         if(stristr($text,'<script')) {
211                                                 $image = '';
212                                                 continue;
213                                         }
214                                         $image = strip_tags($image);
215                                         
216                                         $i = fetch_url($image);
217                                         if($i) {
218                                                 require_once('include/Photo.php');
219                                                 $ph = new Photo($i);
220                                                 if($ph->is_valid()) {
221                                                         if($ph->getWidth() > 300 || $ph->getHeight() > 300) {
222                                                                 $ph->scaleImage(300);
223                                                                 $new_width = $ph->getWidth();
224                                                                 $new_height = $ph->getHeight();
225                                                                 if($textmode)
226                                                                         $image = $br . $br . '[img=' . $new_width . 'x' . $new_height . ']' . $image . '[/img]';
227                                                                 else
228                                                                         $image = '<br /><br /><img height="' . $new_height . '" width="' . $new_width . '" src="' .$image . '" alt="photo" />';
229                                                         }
230                                                         else {
231                                                                 if($textmode) 
232                                                                         $image = $br . $br . '[img]' . $image . '[/img]';
233                                                                 else
234                                                                         $image = '<br /><br /><img src="' . $image . '" alt="photo" />';
235                                                         }
236                                                 }
237                                                 else
238                                                         $image = '';
239                                         
240                                         }
241                                 }
242                         }
243                 }
244         }
245
246         if(strlen($text)) {
247                 if($textmode)
248                         $text = $br .$br . '[quote]' . $text . '[/quote]' . $br ;
249                 else
250                         $text = '<br /><br /><blockquote>' . $text . '</blockquote><br />';
251         }
252
253         if($image) {
254                 $text = $image . $br . $text;
255         }
256         $title = str_replace(array("\r","\n"),array('',''),$title);
257
258         $result = sprintf($template,$url,($title) ? $title : $url,$text) . $str_tags;
259
260         logger('parse_url: returns: ' . $result); 
261
262         echo $result;
263         killme();
264 }