X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=include%2FScrape.php;h=e4f7a0878c71016332f7f28bee75b1f97bef8bf1;hb=2fc89deaeae0341aa12b6b811927e793011055d1;hp=cc50151657b763e505a03acb5b240bad45f77830;hpb=6348e70daa113e8b3203de8fbc919d08c90d972e;p=friendica.git diff --git a/include/Scrape.php b/include/Scrape.php index cc50151657..e4f7a0878c 100644 --- a/include/Scrape.php +++ b/include/Scrape.php @@ -2,15 +2,6 @@ require_once('library/HTML5/Parser.php'); -if(! function_exists('attribute_contains')) { -function attribute_contains($attr,$s) { - $a = explode(' ', $attr); - if(count($a) && in_array($s,$a)) - return true; - return false; -}} - - if(! function_exists('scrape_dfrn')) { function scrape_dfrn($url) { @@ -31,8 +22,15 @@ function scrape_dfrn($url) { foreach($items as $item) { $x = $item->getAttribute('rel'); + if(($x === 'alternate') && ($item->getAttribute('type') === 'application/atom+xml')) + $ret['feed_atom'] = $item->getAttribute('href'); if(substr($x,0,5) == "dfrn-") $ret[$x] = $item->getAttribute('href'); + if($x === 'lrdd') { + $decoded = urldecode($item->getAttribute('href')); + if(preg_match('/acct:([^@]*)@/',$decoded,$matches)) + $ret['nick'] = $matches[1]; + } } // Pull out hCard profile elements @@ -48,7 +46,7 @@ function scrape_dfrn($url) { $ret['photo'] = $x->getAttribute('src'); if(attribute_contains($x->getAttribute('class'),'key')) $ret['key'] = $x->textContent; - } + } } } @@ -76,5 +74,96 @@ function validate_dfrn($a) { return $errors; }} +if(! function_exists('scrape_meta')) { +function scrape_meta($url) { + + $ret = array(); + $s = fetch_url($url); + + if(! $s) + return $ret; + + $dom = HTML5_Parser::parse($s); + + if(! $dom) + return $ret; + + $items = $dom->getElementsByTagName('meta'); + + // get DFRN link elements + + foreach($items as $item) { + $x = $item->getAttribute('name'); + if(substr($x,0,5) == "dfrn-") + $ret[$x] = $item->getAttribute('content'); + } + + return $ret; +}} + + +if(! function_exists('scrape_vcard')) { +function scrape_vcard($url) { + + $ret = array(); + $s = fetch_url($url); + + if(! $s) + return $ret; + + $dom = HTML5_Parser::parse($s); + + if(! $dom) + return $ret; + + // Pull out hCard profile elements + + $items = $dom->getElementsByTagName('*'); + foreach($items as $item) { + if(attribute_contains($item->getAttribute('class'), 'vcard')) { + $level2 = $item->getElementsByTagName('*'); + foreach($level2 as $x) { + if(attribute_contains($x->getAttribute('class'),'fn')) + $ret['fn'] = $x->textContent; + if((attribute_contains($x->getAttribute('class'),'photo')) + || (attribute_contains($x->getAttribute('class'),'avatar'))) + $ret['photo'] = $x->getAttribute('src'); + if((attribute_contains($x->getAttribute('class'),'nickname')) + || (attribute_contains($x->getAttribute('class'),'uid'))) + $ret['nick'] = $x->textContent; + } + } + } + + return $ret; +}} + +if(! function_exists('scrape_feed')) { +function scrape_feed($url) { + $ret = array(); + $s = fetch_url($url); + + if(! $s) + return $ret; + + $dom = HTML5_Parser::parse($s); + + if(! $dom) + return $ret; + + $items = $dom->getElementsByTagName('link'); + + // get Atom link elements + + foreach($items as $item) { + $x = $item->getAttribute('rel'); + if(($x === 'alternate') && ($item->getAttribute('type') === 'application/atom+xml')) + $ret['feed_atom'] = $item->getAttribute('href'); + if(($x === 'alternate') && ($item->getAttribute('type') === 'application/rss+xml')) + $ret['feed_rss'] = $item->getAttribute('href'); + } + + return $ret; +}} \ No newline at end of file