require_once('library/HTML5/Parser.php');
-if(! function_exists('attribute_contains')) {
-function attribute_contains($attr,$s) {
- $a = explode(' ', $attr);
- if(count($a) && in_array($s,$a))
- return true;
- return false;
-}}
-
-
if(! function_exists('scrape_dfrn')) {
function scrape_dfrn($url) {
+ $a = get_app();
+
$ret = array();
+
+ logger('scrape_dfrn: url=' . $url);
+
$s = fetch_url($url);
if(! $s)
return $ret;
+ $headers = $a->get_curl_headers();
+ logger('scrape_dfrn: headers=' . $headers, LOGGER_DEBUG);
+
+
+ $lines = explode("\n",$headers);
+ if(count($lines)) {
+ foreach($lines as $line) {
+ // don't try and run feeds through the html5 parser
+ if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml'))))
+ return ret;
+ }
+ }
+
+
$dom = HTML5_Parser::parse($s);
if(! $dom)
foreach($items as $item) {
$x = $item->getAttribute('rel');
+ if(($x === 'alternate') && ($item->getAttribute('type') === 'application/atom+xml'))
+ $ret['feed_atom'] = $item->getAttribute('href');
if(substr($x,0,5) == "dfrn-")
$ret[$x] = $item->getAttribute('href');
+ if($x === 'lrdd') {
+ $decoded = urldecode($item->getAttribute('href'));
+ if(preg_match('/acct:([^@]*)@/',$decoded,$matches))
+ $ret['nick'] = $matches[1];
+ }
}
// Pull out hCard profile elements
$ret['photo'] = $x->getAttribute('src');
if(attribute_contains($x->getAttribute('class'),'key'))
$ret['key'] = $x->textContent;
- }
+ }
}
}
if(! function_exists('scrape_meta')) {
function scrape_meta($url) {
+ $a = get_app();
+
$ret = array();
+
+ logger('scrape_meta: url=' . $url);
+
$s = fetch_url($url);
if(! $s)
return $ret;
+ $headers = $a->get_curl_headers();
+ logger('scrape_meta: headers=' . $headers, LOGGER_DEBUG);
+
+ $lines = explode("\n",$headers);
+ if(count($lines)) {
+ foreach($lines as $line) {
+ // don't try and run feeds through the html5 parser
+ if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml'))))
+ return ret;
+ }
+ }
+
+
+
$dom = HTML5_Parser::parse($s);
if(! $dom)
return $ret;
}}
+
+
+if(! function_exists('scrape_vcard')) {
+function scrape_vcard($url) {
+
+ $a = get_app();
+
+ $ret = array();
+
+ logger('scrape_vcard: url=' . $url);
+
+ $s = fetch_url($url);
+
+ if(! $s)
+ return $ret;
+
+ $headers = $a->get_curl_headers();
+ $lines = explode("\n",$headers);
+ if(count($lines)) {
+ foreach($lines as $line) {
+ // don't try and run feeds through the html5 parser
+ if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml'))))
+ return ret;
+ }
+ }
+
+ $dom = HTML5_Parser::parse($s);
+
+ if(! $dom)
+ return $ret;
+
+ // Pull out hCard profile elements
+
+ $items = $dom->getElementsByTagName('*');
+ foreach($items as $item) {
+ if(attribute_contains($item->getAttribute('class'), 'vcard')) {
+ $level2 = $item->getElementsByTagName('*');
+ foreach($level2 as $x) {
+ if(attribute_contains($x->getAttribute('class'),'fn'))
+ $ret['fn'] = $x->textContent;
+ if((attribute_contains($x->getAttribute('class'),'photo'))
+ || (attribute_contains($x->getAttribute('class'),'avatar')))
+ $ret['photo'] = $x->getAttribute('src');
+ if((attribute_contains($x->getAttribute('class'),'nickname'))
+ || (attribute_contains($x->getAttribute('class'),'uid')))
+ $ret['nick'] = $x->textContent;
+ }
+ }
+ }
+
+ return $ret;
+}}
+
+
+if(! function_exists('scrape_feed')) {
+function scrape_feed($url) {
+
+ $a = get_app();
+
+ $ret = array();
+ $s = fetch_url($url);
+
+ if(! $s)
+ return $ret;
+
+ $headers = $a->get_curl_headers();
+ logger('scrape_feed: headers=' . $headers, LOGGER_DEBUG);
+
+ $lines = explode("\n",$headers);
+ if(count($lines)) {
+ foreach($lines as $line) {
+ if(stristr($line,'content-type:')) {
+ if(stristr($line,'application/atom+xml') || stristr($s,'<feed')) {
+ $ret['feed_atom'] = $url;
+ return $ret;
+ }
+ if(stristr($line,'application/rss+xml') || stristr($s,'<rss')) {
+ $ret['feed_rss'] = $url;
+ return ret;
+ }
+ }
+ }
+ }
+
+ $dom = HTML5_Parser::parse($s);
+
+ if(! $dom)
+ return $ret;
+
+ $items = $dom->getElementsByTagName('link');
+
+ // get Atom link elements
+
+ foreach($items as $item) {
+ $x = $item->getAttribute('rel');
+ if(($x === 'alternate') && ($item->getAttribute('type') === 'application/atom+xml'))
+ $ret['feed_atom'] = $item->getAttribute('href');
+ if(($x === 'alternate') && ($item->getAttribute('type') === 'application/rss+xml'))
+ $ret['feed_rss'] = $item->getAttribute('href');
+ }
+
+ return $ret;
+}}
\ No newline at end of file