]> git.mxchange.org Git - friendica.git/blob - mod/parse_url.php
do a slightly better job at finding relevant content from scraping submitted links
[friendica.git] / mod / parse_url.php
1 <?php
2
3 require_once('library/HTML5/Parser.php');
4
5 function parse_url_content(&$a) {
6
7         $url = trim($_GET['url']);
8
9         $text = null;
10
11         $template = "<a href=\"%s\" >%s</a>%s";
12
13         if($url) 
14                 $s = fetch_url($url);
15         else {
16                 echo '';
17                 killme();
18         }
19         
20         if(! $s) {
21                 echo sprintf($template,$url,$url,'');
22                 killme();
23         }
24
25         $dom = HTML5_Parser::parse($s);
26
27         if(! $dom)
28                 return $ret;
29
30         $items = $dom->getElementsByTagName('title');
31
32         if($items) {
33                 foreach($items as $item) {
34                         $title = trim($item->textContent);
35                         break;
36                 }
37         }
38
39
40         $divs = $dom->getElementsByTagName('div');
41         if($divs) {
42                 foreach($divs as $div) {
43                         $class = $div->getAttribute('class');
44                         if($class && stristr($class,'article')) {
45                                 $items = $div->getElementsByTagName('p');
46                                 if($items) {
47                                         foreach($items as $item) {
48                                                 $text = $item->textContent;
49                                                 $text = strip_tags($text);
50                                                 if(strlen($text) < 100)
51                                                         continue;
52                                                 $text = substr($text,0,250) . '...' ;
53                                                 break;
54                                         }
55                                 }
56                         }
57                 }
58         }
59
60         if(! $text) {
61                 $items = $dom->getElementsByTagName('p');
62                 if($items) {
63                         foreach($items as $item) {
64                                 $text = $item->textContent;
65                                 $text = strip_tags($text);
66                                 if(strlen($text) < 100)
67                                         continue;
68                                 $text = substr($text,0,250) . '...' ;
69                                 break;
70                         }
71                 }
72         }
73
74         if(strlen($text)) {
75                 $text = '<br />' . $text;
76         }
77
78         echo sprintf($template,$url,$title,$text);
79         killme();
80 }