]> git.mxchange.org Git - friendica.git/blob - mod/parse_url.php
social bookmark API, oexchange enhancements
[friendica.git] / mod / parse_url.php
1 <?php
2
3 require_once('library/HTML5/Parser.php');
4 require_once('library/HTMLPurifier.auto.php');
5
6 function arr_add_hashes(&$item,$k) {
7         $item = '#' . $item;
8 }
9
10 function parse_url_content(&$a) {
11
12         $text = null;
13         $str_tags = '';
14
15         if(x($_GET,'binurl'))
16                 $url = trim(hex2bin($_GET['binurl']));
17         else
18                 $url = trim($_GET['url']);
19
20         if($_GET['title'])
21                 $title = strip_tags(trim($_GET['title']));
22
23         if($_GET['description'])
24                 $text = strip_tags(trim($_GET['description']));
25
26         if($_GET['tags']) {
27                 $arr_tags = str_getcsv($_GET['tags']);
28                 if(count($arr_tags)) {
29                         array_walk($arr_tags,'arr_add_hashes');
30                         $str_tags = '<br />' . implode(' ',$arr_tags) . '<br />';               
31                 }
32         }
33
34         logger('parse_url: ' . $url);
35
36
37         $template = "<br /><a class=\"bookmark\" href=\"%s\" >%s</a>%s<br />";
38
39
40         $arr = array('url' => $url, 'text' => '');
41
42         call_hooks('parse_link', $arr);
43
44         if(strlen($arr['text'])) {
45                 echo $arr['text'];
46                 killme();
47         }
48
49         if($url && $title && $text) {
50
51                 $text = '<br /><br /><blockquote>' . $text . '</blockquote><br />';
52                 $title = str_replace(array("\r","\n"),array('',''),$title);
53
54                 $result = sprintf($template,$url,($title) ? $title : $url,$text) . $str_tags;
55
56                 logger('parse_url (unparsed): returns: ' . $result); 
57
58                 echo $result;
59                 killme();
60         }
61
62
63         if($url) {
64                 $s = fetch_url($url);
65         } else {
66                 echo '';
67                 killme();
68         }
69
70         logger('parse_url: data: ' . $s, LOGGER_DATA);
71
72         if(! $s) {
73                 echo sprintf($template,$url,$url,'') . $str_tags;
74                 killme();
75         }
76
77         if(! $title) {
78                 if(strpos($s,'<title>')) {
79                         $title = substr($s,strpos($s,'<title>')+7,64);
80                         if(strpos($title,'<') !== false)
81                                 $title = strip_tags(substr($title,0,strpos($title,'<')));
82                 }
83         }
84
85         $config = HTMLPurifier_Config::createDefault();
86         $config->set('Cache.DefinitionImpl', null);
87
88         $purifier = new HTMLPurifier($config);
89         $s = $purifier->purify($s);
90
91 //      logger('parse_url: purified: ' . $s, LOGGER_DATA);
92
93         $dom = @HTML5_Parser::parse($s);
94
95         if(! $dom) {
96                 echo sprintf($template,$url,$url,'') . $str_tags;
97                 killme();
98         }
99
100         $items = $dom->getElementsByTagName('title');
101
102         if($items) {
103                 foreach($items as $item) {
104                         $title = trim($item->textContent);
105                         break;
106                 }
107         }
108
109
110         if(! $text) {
111                 $divs = $dom->getElementsByTagName('div');
112                 if($divs) {
113                         foreach($divs as $div) {
114                                 $class = $div->getAttribute('class');
115                                 if($class && (stristr($class,'article') || stristr($class,'content'))) {
116                                         $items = $div->getElementsByTagName('p');
117                                         if($items) {
118                                                 foreach($items as $item) {
119                                                         $text = $item->textContent;
120                                                         if(stristr($text,'<script')) {
121                                                                 $text = '';
122                                                                 continue;
123                                                         }
124                                                         $text = strip_tags($text);
125                                                         if(strlen($text) < 100) {
126                                                                 $text = '';
127                                                                 continue;
128                                                         }
129                                                         $text = substr($text,0,250) . '...' ;
130                                                         break;
131                                                 }
132                                         }
133                                 }
134                                 if($text)
135                                         break;
136                         }
137                 }
138
139                 if(! $text) {
140                         $items = $dom->getElementsByTagName('p');
141                         if($items) {
142                                 foreach($items as $item) {
143                                         $text = $item->textContent;
144                                         if(stristr($text,'<script'))
145                                                 continue;
146                                         $text = strip_tags($text);
147                                         if(strlen($text) < 100) {
148                                                 $text = '';
149                                                 continue;
150                                         }
151                                         $text = substr($text,0,250) . '...' ;
152                                         break;
153                                 }
154                         }
155                 }
156         }
157
158         if(strlen($text)) {
159                 $text = '<br /><br /><blockquote>' . $text . '</blockquote><br />';
160         }
161
162         $title = str_replace(array("\r","\n"),array('',''),$title);
163
164         $result = sprintf($template,$url,($title) ? $title : $url,$text) . $str_tags;
165
166         logger('parse_url: returns: ' . $result); 
167
168         echo $result;
169         killme();
170 }