]> git.mxchange.org Git - friendica.git/blob - include/Scrape.php
Merge branch 'friendika-master'
[friendica.git] / include / Scrape.php
1 <?php
2
3 require_once('library/HTML5/Parser.php');
4
5 if(! function_exists('scrape_dfrn')) {
6 function scrape_dfrn($url) {
7
8         $a = get_app();
9
10         $ret = array();
11         $s = fetch_url($url);
12
13         if(! $s) 
14                 return $ret;
15
16         $headers = $a->get_curl_headers();
17         $lines = explode("\n",$headers);
18         if(count($lines)) {
19                 foreach($lines as $line) {                              
20                         // don't try and run feeds through the html5 parser
21                         if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml'))))
22                                 return ret;
23                 }
24         }
25
26
27         $dom = HTML5_Parser::parse($s);
28
29         if(! $dom)
30                 return $ret;
31
32         $items = $dom->getElementsByTagName('link');
33
34         // get DFRN link elements
35
36         foreach($items as $item) {
37                 $x = $item->getAttribute('rel');
38                 if(($x === 'alternate') && ($item->getAttribute('type') === 'application/atom+xml'))
39                         $ret['feed_atom'] = $item->getAttribute('href');
40                 if(substr($x,0,5) == "dfrn-")
41                         $ret[$x] = $item->getAttribute('href');
42                 if($x === 'lrdd') {
43                         $decoded = urldecode($item->getAttribute('href'));
44                         if(preg_match('/acct:([^@]*)@/',$decoded,$matches))
45                                 $ret['nick'] = $matches[1];
46                 }
47         }
48
49         // Pull out hCard profile elements
50
51         $items = $dom->getElementsByTagName('*');
52         foreach($items as $item) {
53                 if(attribute_contains($item->getAttribute('class'), 'vcard')) {
54                         $level2 = $item->getElementsByTagName('*');
55                         foreach($level2 as $x) {
56                                 if(attribute_contains($x->getAttribute('class'),'fn'))
57                                         $ret['fn'] = $x->textContent;
58                                 if(attribute_contains($x->getAttribute('class'),'photo'))
59                                         $ret['photo'] = $x->getAttribute('src');
60                                 if(attribute_contains($x->getAttribute('class'),'key'))
61                                         $ret['key'] = $x->textContent;
62                         }
63                 }
64         }
65
66         return $ret;
67 }}
68
69
70
71
72
73
74 if(! function_exists('validate_dfrn')) {
75 function validate_dfrn($a) {
76         $errors = 0;
77         if(! x($a,'key'))
78                 $errors ++;
79         if(! x($a,'dfrn-request'))
80                 $errors ++;
81         if(! x($a,'dfrn-confirm'))
82                 $errors ++;
83         if(! x($a,'dfrn-notify'))
84                 $errors ++;
85         if(! x($a,'dfrn-poll'))
86                 $errors ++;
87         return $errors;
88 }}
89
90 if(! function_exists('scrape_meta')) {
91 function scrape_meta($url) {
92
93         $a = get_app();
94
95         $ret = array();
96         $s = fetch_url($url);
97
98         if(! $s) 
99                 return $ret;
100
101         $headers = $a->get_curl_headers();
102         $lines = explode("\n",$headers);
103         if(count($lines)) {
104                 foreach($lines as $line) {                              
105                         // don't try and run feeds through the html5 parser
106                         if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml'))))
107                                 return ret;
108                 }
109         }
110
111
112
113         $dom = HTML5_Parser::parse($s);
114
115         if(! $dom)
116                 return $ret;
117
118         $items = $dom->getElementsByTagName('meta');
119
120         // get DFRN link elements
121
122         foreach($items as $item) {
123                 $x = $item->getAttribute('name');
124                 if(substr($x,0,5) == "dfrn-")
125                         $ret[$x] = $item->getAttribute('content');
126         }
127
128         return $ret;
129 }}
130
131
132 if(! function_exists('scrape_vcard')) {
133 function scrape_vcard($url) {
134
135         $a = get_app();
136
137         $ret = array();
138         $s = fetch_url($url);
139
140         if(! $s) 
141                 return $ret;
142
143         $headers = $a->get_curl_headers();
144         $lines = explode("\n",$headers);
145         if(count($lines)) {
146                 foreach($lines as $line) {                              
147                         // don't try and run feeds through the html5 parser
148                         if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml'))))
149                                 return ret;
150                 }
151         }
152
153         $dom = HTML5_Parser::parse($s);
154
155         if(! $dom)
156                 return $ret;
157
158         // Pull out hCard profile elements
159
160         $items = $dom->getElementsByTagName('*');
161         foreach($items as $item) {
162                 if(attribute_contains($item->getAttribute('class'), 'vcard')) {
163                         $level2 = $item->getElementsByTagName('*');
164                         foreach($level2 as $x) {
165                                 if(attribute_contains($x->getAttribute('class'),'fn'))
166                                         $ret['fn'] = $x->textContent;
167                                 if((attribute_contains($x->getAttribute('class'),'photo'))
168                                         || (attribute_contains($x->getAttribute('class'),'avatar')))
169                                         $ret['photo'] = $x->getAttribute('src');
170                                 if((attribute_contains($x->getAttribute('class'),'nickname'))
171                                         || (attribute_contains($x->getAttribute('class'),'uid')))
172                                         $ret['nick'] = $x->textContent;
173                         }
174                 }
175         }
176
177         return $ret;
178 }}
179
180
181 if(! function_exists('scrape_feed')) {
182 function scrape_feed($url) {
183
184         $a = get_app();
185
186         $ret = array();
187         $s = fetch_url($url);
188
189         if(! $s) 
190                 return $ret;
191
192         $headers = $a->get_curl_headers();
193         $lines = explode("\n",$headers);
194         if(count($lines)) {
195                 foreach($lines as $line) {                              
196                         if(stristr($line,'content-type:')) {
197                                 if(stristr($line,'application/atom+xml')) {
198                                         $ret['feed_atom'] = $url;
199                                         return $ret;
200                                 }
201                                 if(stristr($line,'application/rss+xml')) {
202                                         $ret['feed_rss'] = $url;
203                                         return ret;
204                                 }
205                         }
206                 }
207         }
208
209         $dom = HTML5_Parser::parse($s);
210
211         if(! $dom)
212                 return $ret;
213
214         $items = $dom->getElementsByTagName('link');
215
216         // get Atom link elements
217
218         foreach($items as $item) {
219                 $x = $item->getAttribute('rel');
220                 if(($x === 'alternate') && ($item->getAttribute('type') === 'application/atom+xml'))
221                         $ret['feed_atom'] = $item->getAttribute('href');
222                 if(($x === 'alternate') && ($item->getAttribute('type') === 'application/rss+xml'))
223                         $ret['feed_rss'] = $item->getAttribute('href');
224         }
225
226         return $ret;
227 }}