]> git.mxchange.org Git - quix0rs-gnu-social.git/commitdiff
add hkit for hCard parsing
authorEvan Prodromou <evan@status.net>
Fri, 26 Feb 2010 00:11:39 +0000 (19:11 -0500)
committerEvan Prodromou <evan@status.net>
Fri, 26 Feb 2010 00:11:39 +0000 (19:11 -0500)
plugins/OStatus/extlib/hkit/hcard.profile.php [new file with mode: 0644]
plugins/OStatus/extlib/hkit/hkit.class.php [new file with mode: 0644]

diff --git a/plugins/OStatus/extlib/hkit/hcard.profile.php b/plugins/OStatus/extlib/hkit/hcard.profile.php
new file mode 100644 (file)
index 0000000..6ec0dc8
--- /dev/null
@@ -0,0 +1,105 @@
+<?php
+       // hcard profile for hkit
+       
+       $this->root_class = 'vcard';
+       
+       $this->classes = array( 
+               'fn', array('honorific-prefix', 'given-name', 'additional-name', 'family-name', 'honorific-suffix'),
+               'n', array('honorific-prefix', 'given-name', 'additional-name', 'family-name', 'honorific-suffix'),
+               'adr', array('post-office-box', 'extended-address', 'street-address', 'postal-code', 'country-name', 'type', 'region', 'locality'),
+               'label', 'bday', 'agent', 'nickname', 'photo', 'class', 
+               'email', array('type', 'value'), 
+               'category', 'key', 'logo', 'mailer', 'note',
+               'org', array('organization-name', 'organization-unit'),
+               'tel', array('type', 'value'),
+               'geo', array('latitude', 'longitude'),
+               'tz', 'uid', 'url', 'rev', 'role', 'sort-string', 'sound', 'title'              
+       );
+       
+       // classes that must only appear once per card
+       $this->singles = array(
+               'fn'
+       );
+       
+       // classes that are required (not strictly enforced - give at least one!)
+       $this->required = array(
+               'fn'
+       );
+
+       $this->att_map = array(
+               'fn'    => array('IMG|alt'),
+               'url'   => array('A|href', 'IMG|src', 'AREA|href'),
+               'photo' => array('IMG|src'),
+               'bday'  => array('ABBR|title'),
+               'logo'  => array('IMG|src'),
+               'email' => array('A|href'),
+               'geo'   => array('ABBR|title')
+       );
+
+       
+       $this->callbacks = array(
+               'url'   => array($this, 'resolvePath'),
+               'photo' => array($this, 'resolvePath'),
+               'logo'  => array($this, 'resolvePath'),
+               'email' => array($this, 'resolveEmail')
+       );
+
+
+
+       function hKit_hcard_post($a)
+       {
+               
+               foreach ($a as &$vcard){
+                       
+                       hKit_implied_n_optimization($vcard);
+                       hKit_implied_n_from_fn($vcard);
+                       
+               }
+               
+               return $a;
+       
+       }
+       
+       
+       function hKit_implied_n_optimization(&$vcard)
+       {
+               if (array_key_exists('fn', $vcard) && !is_array($vcard['fn']) && 
+                       !array_key_exists('n', $vcard) && (!array_key_exists('org', $vcard) || $vcard['fn'] != $vcard['org'])){
+                       
+                       if (sizeof(explode(' ', $vcard['fn'])) == 2){
+                               $patterns       = array();
+                               $patterns[] = array('/^(\S+),\s*(\S{1})$/', 2, 1);              // Lastname, Initial
+                               $patterns[] = array('/^(\S+)\s*(\S{1})\.*$/', 2, 1);    // Lastname Initial(.)
+                               $patterns[] = array('/^(\S+),\s*(\S+)$/', 2, 1);                // Lastname, Firstname
+                               $patterns[] = array('/^(\S+)\s*(\S+)$/', 1, 2);                 // Firstname Lastname
+                       
+                               foreach ($patterns as $pattern){
+                                       if (preg_match($pattern[0], $vcard['fn'], $matches) === 1){
+                                               $n                                      = array();
+                                               $n['given-name']        = $matches[$pattern[1]];
+                                               $n['family-name']       = $matches[$pattern[2]];
+                                               $vcard['n']                     = $n;
+                                               
+                                               
+                                               break;
+                                       }
+                               }
+                       }
+               }
+       }
+       
+       
+       function hKit_implied_n_from_fn(&$vcard)
+       {
+               if (array_key_exists('fn', $vcard) && is_array($vcard['fn']) 
+                       && !array_key_exists('n', $vcard) && (!array_key_exists('org', $vcard) || $vcard['fn'] != $vcard['org'])){
+                               
+                       $vcard['n']             = $vcard['fn'];
+               }
+
+               if (array_key_exists('fn', $vcard) && is_array($vcard['fn'])){
+                       $vcard['fn']    = $vcard['fn']['text'];
+               }
+       }
+
+?>
\ No newline at end of file
diff --git a/plugins/OStatus/extlib/hkit/hkit.class.php b/plugins/OStatus/extlib/hkit/hkit.class.php
new file mode 100644 (file)
index 0000000..c3a54cf
--- /dev/null
@@ -0,0 +1,475 @@
+<?php
+
+       /* 
+       
+       hKit Library for PHP5 - a generic library for parsing Microformats
+       Copyright (C) 2006  Drew McLellan
+
+       This library is free software; you can redistribute it and/or
+       modify it under the terms of the GNU Lesser General Public
+       License as published by the Free Software Foundation; either
+       version 2.1 of the License, or (at your option) any later version.
+
+       This library is distributed in the hope that it will be useful,
+       but WITHOUT ANY WARRANTY; without even the implied warranty of
+       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+       Lesser General Public License for more details.
+
+       You should have received a copy of the GNU Lesser General Public
+       License along with this library; if not, write to the Free Software
+       Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+       
+       Author  
+               Drew McLellan - http://allinthehead.com/
+               
+       Contributors:
+               Scott Reynen - http://www.randomchaos.com/
+               
+       Version 0.5, 22-Jul-2006
+               fixed by-ref issue cropping up in PHP 5.0.5
+               fixed a bug with a@title
+               added support for new fn=n optimisation
+               added support for new a.include include-pattern
+       Version 0.4, 23-Jun-2006
+               prevented nested includes from causing infinite loops
+               returns false if URL can't be fetched
+               added pre-flight check for base support level
+               added deduping of once-only classnames
+               prevented accumulation of multiple 'value' values
+               tuned whitespace handling and treatment of DEL elements
+       Version 0.3, 21-Jun-2006
+               added post-processor callback method into profiles
+               fixed minor problems raised by hcard testsuite
+               added support for include-pattern
+               added support for td@headers pattern
+               added implied-n optimization into default hcard profile
+       Version 0.2, 20-Jun-2006
+               added class callback mechanism
+               added resolvePath & resolveEmail
+               added basic BASE support
+       Version 0.1.1, 19-Jun-2006 (different timezone, no time machine)
+               added external Tidy option
+       Version 0.1, 20-Jun-2006
+               initial release
+               
+       
+       
+       
+       */
+
+       class hKit
+       {
+               
+               public $tidy_mode       = 'proxy'; // 'proxy', 'exec', 'php' or 'none'
+               public $tidy_proxy      = 'http://cgi.w3.org/cgi-bin/tidy?forceXML=on&docAddr='; // required only for tidy_mode=proxy
+               public $tmp_dir         = '/path/to/writable/dir/'; // required only for tidy_mode=exec
+               
+               private $root_class = '';
+               private $classes        = '';
+               private $singles        = '';
+               private $required       = '';
+               private $att_map        = '';
+               private $callbacks      = '';
+               private $processor      = '';
+               
+               private $url            = '';
+               private $base           = '';
+               private $doc            = '';
+               
+               
+               public function hKit()
+               {
+                       // pre-flight checks
+                       $pass           = true; 
+                       $required       = array('dom_import_simplexml', 'file_get_contents', 'simplexml_load_string');
+                       $missing        = array();
+                       
+                       foreach ($required as $f){
+                               if (!function_exists($f)){
+                                       $pass           = false;
+                                       $missing[]      = $f . '()';
+                               }
+                       }
+                       
+                       if (!$pass)
+                               die('hKit error: these required functions are not available: <strong>' . implode(', ', $missing) . '</strong>');
+                       
+               }
+               
+
+               public function getByURL($profile='', $url='')
+               {
+                       
+                       if ($profile=='' || $url == '') return false;
+                       
+                       $this->loadProfile($profile);
+                       
+                       $source         = $this->loadURL($url);
+                       
+                       if ($source){
+                               $tidy_xhtml     = $this->tidyThis($source);
+
+                               $fragment       = false;
+                       
+                               if (strrchr($url, '#'))
+                               $fragment       = array_pop(explode('#', $url));
+                       
+                               $doc            = $this->loadDoc($tidy_xhtml, $fragment);
+                               $s                      = $this->processNodes($doc, $this->classes);
+                               $s                      = $this->postProcess($profile, $s);
+                       
+                               return $s;
+                       }else{
+                               return false;
+                       }
+               }
+               
+               public function getByString($profile='', $input_xml='')
+               {
+                       if ($profile=='' || $input_xml == '') return false;
+                       
+                       $this->loadProfile($profile);
+
+                       $doc    = $this->loadDoc($input_xml);
+                       $s              = $this->processNodes($doc, $this->classes);
+                       $s              = $this->postProcess($profile, $s);
+                       
+                       return $s;
+                       
+               }
+               
+               private function processNodes($items, $classes, $allow_includes=true){
+
+                       $out    = array();
+
+                       foreach($items as $item){
+                               $data   = array();
+
+                               for ($i=0; $i<sizeof($classes); $i++){
+                                       
+                                       if (!is_array($classes[$i])){
+
+                                               $xpath                  = ".//*[contains(concat(' ',normalize-space(@class),' '),' " . $classes[$i] . " ')]";
+                                               $results                = $item->xpath($xpath);
+                                               
+                                               if ($results){
+                                                       foreach ($results as $result){ 
+                                                               if (isset($classes[$i+1]) && is_array($classes[$i+1])){
+                                                                       $nodes                          = $this->processNodes($results, $classes[$i+1]);
+                                                                       if (sizeof($nodes) > 0){
+                                                                               $nodes = array_merge(array('text'=>$this->getNodeValue($result, $classes[$i])), $nodes);
+                                                                               $data[$classes[$i]]     = $nodes;
+                                                                       }else{
+                                                                               $data[$classes[$i]]     = $this->getNodeValue($result, $classes[$i]);
+                                                                       }
+                                                                       
+                                                               }else{                                                          
+                                                                       if (isset($data[$classes[$i]])){
+                                                                               if (is_array($data[$classes[$i]])){
+                                                                                       // is already an array - append
+                                                                                       $data[$classes[$i]][]   = $this->getNodeValue($result, $classes[$i]);
+
+                                                                               }else{
+                                                                                       // make it an array
+                                                                                       if ($classes[$i] == 'value'){ // unless it's the 'value' of a type/value pattern
+                                                                                               $data[$classes[$i]] .= $this->getNodeValue($result, $classes[$i]);
+                                                                                       }else{
+                                                                                               $old_val                        = $data[$classes[$i]];
+                                                                                               $data[$classes[$i]]     = array($old_val, $this->getNodeValue($result, $classes[$i]));
+                                                                                               $old_val                        = false;
+                                                                                       }
+                                                                               }
+                                                                       }else{                                                                          
+                                                                               // set as normal value
+                                                                               $data[$classes[$i]]     = $this->getNodeValue($result, $classes[$i]);
+
+                                                                       }
+                                                               }
+                                                       
+                                                               // td@headers pattern
+                                                               if (strtoupper(dom_import_simplexml($result)->tagName)== "TD" && $result['headers']){
+                                                                       $include_ids    = explode(' ', $result['headers']);
+                                                                       $doc                    = $this->doc;
+                                                                       foreach ($include_ids as $id){
+                                                                               $xpath                  = "//*[@id='$id']/..";
+                                                                               $includes               = $doc->xpath($xpath);
+                                                                               foreach ($includes as $include){
+                                                                                       $tmp = $this->processNodes($include, $this->classes);
+                                                                                       if (is_array($tmp)) $data = array_merge($data, $tmp);
+                                                                               }
+                                                                       }
+                                                               }
+                                                       }                                       
+                                               }                               
+                                       }
+                                       $result = false;
+                               }
+                               
+                               // include-pattern
+                               if ($allow_includes){
+                                       $xpath                  = ".//*[contains(concat(' ',normalize-space(@class),' '),' include ')]";
+                                       $results                = $item->xpath($xpath);
+                               
+                                       if ($results){
+                                               foreach ($results as $result){
+                                                       $tagName = strtoupper(dom_import_simplexml($result)->tagName);
+                                                       if ((($tagName == "OBJECT" && $result['data']) || ($tagName == "A" && $result['href'])) 
+                                                                       && preg_match('/\binclude\b/', $result['class'])){      
+                                                               $att            = ($tagName == "OBJECT" ? 'data' : 'href');                                             
+                                                               $id                     = str_replace('#', '', $result[$att]);
+                                                               $doc            = $this->doc;
+                                                               $xpath          = "//*[@id='$id']";
+                                                               $includes       = $doc->xpath($xpath);
+                                                               foreach ($includes as $include){
+                                                                       $include        = simplexml_load_string('<root1><root2>'.$include->asXML().'</root2></root1>'); // don't ask.
+                                                                       $tmp            = $this->processNodes($include, $this->classes, false);
+                                                                       if (is_array($tmp)) $data = array_merge($data, $tmp);
+                                                               }
+                                                       }
+                                               }
+                                       }
+                               }
+                               $out[]  = $data;
+                       }
+                       
+                       if (sizeof($out) > 1){
+                               return $out;
+                       }else if (isset($data)){
+                               return $data;
+                       }else{
+                               return array();
+                       }
+               }
+
+
+               private function getNodeValue($node, $className)
+               {
+
+                       $tag_name       = strtoupper(dom_import_simplexml($node)->tagName);
+                       $s                      = false;
+                       
+                       // ignore DEL tags
+                       if ($tag_name == 'DEL') return $s;
+                       
+                       // look up att map values
+                       if (array_key_exists($className, $this->att_map)){
+                               
+                               foreach ($this->att_map[$className] as $map){                                   
+                                       if (preg_match("/$tag_name\|/", $map)){
+                                               $s      = ''.$node[array_pop($foo = explode('|', $map))];
+                                       }
+                               }
+                       }
+                       
+                       // if nothing and OBJ, try data.
+                       if (!$s && $tag_name=='OBJECT' && $node['data'])        $s      = ''.$node['data'];
+                       
+                       // if nothing and IMG, try alt.
+                       if (!$s && $tag_name=='IMG' && $node['alt'])    $s      = ''.$node['alt'];
+                       
+                       // if nothing and AREA, try alt.
+                       if (!$s && $tag_name=='AREA' && $node['alt'])   $s      = ''.$node['alt'];
+                       
+                       //if nothing and not A, try title.
+                       if (!$s && $tag_name!='A' && $node['title'])    $s      = ''.$node['title'];
+                               
+                       
+                       // if nothing found, go with node text
+                       $s      = ($s ? $s : implode(array_filter($node->xpath('child::node()'), array(&$this, "filterBlankValues")), ' '));                    
+
+                       // callbacks                    
+                       if (array_key_exists($className, $this->callbacks)){
+                               $s      = preg_replace_callback('/.*/', $this->callbacks[$className], $s, 1);
+                       }
+                       
+                       // trim and remove line breaks
+                       if ($tag_name != 'PRE'){
+                               $s      = trim(preg_replace('/[\r\n\t]+/', '', $s));
+                               $s      = trim(preg_replace('/(\s{2})+/', ' ', $s));
+                       }
+                       
+                       return $s;
+               }
+
+               private function filterBlankValues($s){
+                       return preg_match("/\w+/", $s);
+               }
+               
+               
+               private function tidyThis($source)
+               {
+                       switch ( $this->tidy_mode )
+                       {
+                               case 'exec':
+                                       $tmp_file       = $this->tmp_dir.md5($source).'.txt';
+                                       file_put_contents($tmp_file, $source);
+                                       exec("tidy -utf8 -indent -asxhtml -numeric -bare -quiet $tmp_file", $tidy);
+                                       unlink($tmp_file);
+                                       return implode("\n", $tidy);
+                               break;
+                               
+                               case 'php':
+                                       $tidy   = tidy_parse_string($source);
+                                       return tidy_clean_repair($tidy);
+                               break;
+                                               
+                               default:
+                                       return $source;
+                               break;
+                       }
+                       
+               }
+               
+               
+               private function loadProfile($profile)
+               {
+                       require_once("$profile.profile.php");
+               }
+               
+               
+               private function loadDoc($input_xml, $fragment=false)
+               {
+                       $xml            = simplexml_load_string($input_xml);
+                       
+                       $this->doc      = $xml;
+                       
+                       if ($fragment){
+                               $doc    = $xml->xpath("//*[@id='$fragment']");
+                               $xml    = simplexml_load_string($doc[0]->asXML());
+                               $doc    = null;
+                       }
+                       
+                       // base tag
+                       if ($xml->head->base['href']) $this->base = $xml->head->base['href'];                   
+
+                       // xml:base attribute - PITA with SimpleXML
+                       preg_match('/xml:base="(.*)"/', $xml->asXML(), $matches);
+                       if (is_array($matches) && sizeof($matches)>1) $this->base = $matches[1];
+                                                               
+                       return  $xml->xpath("//*[contains(concat(' ',normalize-space(@class),' '),' $this->root_class ')]");
+                       
+               }
+               
+               
+               private function loadURL($url)
+               {
+                       $this->url      = $url;
+                       
+                       if ($this->tidy_mode == 'proxy' && $this->tidy_proxy != ''){
+                               $url    = $this->tidy_proxy . $url;
+                       }
+               
+                       return @file_get_contents($url);
+                       
+               }
+               
+               
+               private function postProcess($profile, $s)
+               {
+                       $required       = $this->required;
+                       
+                       if (is_array($s) && array_key_exists($required[0], $s)){
+                               $s      = array($s);
+                       }
+                       
+                       $s      = $this->dedupeSingles($s);
+                       
+                       if (function_exists('hKit_'.$profile.'_post')){
+                               $s              = call_user_func('hKit_'.$profile.'_post', $s);
+                       }
+                       
+                       $s      = $this->removeTextVals($s);
+                       
+                       return $s;
+               }
+               
+               
+               private function resolvePath($filepath)
+               {       // ugly code ahoy: needs a serious tidy up
+                                       
+                       $filepath       = $filepath[0];
+                       
+                       $base   = $this->base;
+                       $url    = $this->url;
+                       
+                       if ($base != '' &&  strpos($base, '://') !== false)
+                               $url    = $base;
+                       
+                       $r              = parse_url($url);
+                       $domain = $r['scheme'] . '://' . $r['host'];
+
+                       if (!isset($r['path'])) $r['path'] = '/';
+                       $path   = explode('/', $r['path']);
+                       $file   = explode('/', $filepath);
+                       $new    = array('');
+
+                       if (strpos($filepath, '://') !== false || strpos($filepath, 'data:') !== false){
+                               return $filepath;
+                       }
+
+                       if ($file[0] == ''){
+                               // absolute path
+                               return ''.$domain . implode('/', $file);
+                       }else{
+                               // relative path
+                               if ($path[sizeof($path)-1] == '') array_pop($path);
+                               if (strpos($path[sizeof($path)-1], '.') !== false) array_pop($path);
+
+                               foreach ($file as $segment){
+                                       if ($segment == '..'){
+                                               array_pop($path);
+                                       }else{
+                                               $new[]  = $segment;
+                                       }
+                               }
+                               return ''.$domain . implode('/', $path) . implode('/', $new);
+                       }       
+               }
+               
+               private function resolveEmail($v)
+               {
+                       $parts  = parse_url($v[0]);
+                       return ($parts['path']);
+               }
+               
+               
+               private function dedupeSingles($s)
+               {
+                       $singles        = $this->singles;
+                       
+                       foreach ($s as &$item){
+                               foreach ($singles as $classname){
+                                       if (array_key_exists($classname, $item) && is_array($item[$classname])){
+                                               if (isset($item[$classname][0])) $item[$classname]      = $item[$classname][0];
+                                       }
+                               }
+                       }
+                       
+                       return $s;
+               }
+               
+               private function removeTextVals($s)
+               {
+                       foreach ($s as $key => &$val){
+                               if ($key){
+                                       $k = $key;
+                               }else{
+                                       $k = '';
+                               }
+                               
+                               if (is_array($val)){
+                                       $val = $this->removeTextVals($val);
+                               }else{
+                                       if ($k == 'text'){
+                                               $val = '';
+                                       }
+                               }
+                       }
+                       
+                       return array_filter($s);
+               }
+
+       }
+
+
+?>
\ No newline at end of file