plugins/FeedSub/extlib/XML/Feed/Parser/Type.php

   1 <?php
   2 /* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
   3
   4 /**
   5  * Abstract class providing common methods for XML_Feed_Parser feeds.
   6  *
   7  * PHP versions 5
   8  *
   9  * LICENSE: This source file is subject to version 3.0 of the PHP license
  10  * that is available through the world-wide-web at the following URI:
  11  * http://www.php.net/license/3_0.txt.  If you did not receive a copy of
  12  * the PHP License and are unable to obtain it through the web, please
  13  * send a note to license@php.net so we can mail you a copy immediately.
  14  *
  15  * @category   XML
  16  * @package    XML_Feed_Parser
  17  * @author     James Stewart <james@jystewart.net>
  18  * @copyright  2005 James Stewart <james@jystewart.net>
  19  * @license    http://www.gnu.org/copyleft/lesser.html  GNU LGPL 2.1
  20  * @version    CVS: $Id: Type.php,v 1.25 2008/03/08 18:39:09 jystewart Exp $
  21  * @link       http://pear.php.net/package/XML_Feed_Parser/
  22  */
  23
  24 /**
  25  * This abstract class provides some general methods that are likely to be
  26  * implemented exactly the same way for all feed types.
  27  *
  28  * @package XML_Feed_Parser
  29  * @author  James Stewart <james@jystewart.net>
  30  * @version Release: 1.0.3
  31  */
  32 abstract class XML_Feed_Parser_Type
  33 {
  34     /**
  35      * Where we store our DOM object for this feed
  36      * @var DOMDocument
  37      */
  38     public $model;
  39
  40     /**
  41      * For iteration we'll want a count of the number of entries
  42      * @var int
  43      */
  44     public $numberEntries;
  45
  46     /**
  47      * Where we store our entry objects once instantiated
  48      * @var array
  49      */
  50     public $entries = array();
  51
  52     /**
  53      * Store mappings between entry IDs and their position in the feed
  54      */
  55     public $idMappings = array();
  56
  57     /**
  58      * Proxy to allow use of element names as method names
  59      *
  60      * We are not going to provide methods for every entry type so this
  61      * function will allow for a lot of mapping. We rely pretty heavily
  62      * on this to handle our mappings between other feed types and atom.
  63      *
  64      * @param   string  $call - the method attempted
  65      * @param   array   $arguments - arguments to that method
  66      * @return  mixed
  67      */
  68     function __call($call, $arguments = array())
  69     {
  70         if (! is_array($arguments)) {
  71             $arguments = array();
  72         }
  73
  74         if (isset($this->compatMap[$call])) {
  75             $tempMap = $this->compatMap;
  76             $tempcall = array_pop($tempMap[$call]);
  77             if (! empty($tempMap)) {
  78                 $arguments = array_merge($arguments, $tempMap[$call]);
  79             }
  80             $call = $tempcall;
  81         }
  82
  83         /* To be helpful, we allow a case-insensitive search for this method */
  84         if (! isset($this->map[$call])) {
  85             foreach (array_keys($this->map) as $key) {
  86                 if (strtoupper($key) == strtoupper($call)) {
  87                     $call = $key;
  88                     break;
  89                 }
  90             }
  91         }
  92
  93         if (empty($this->map[$call])) {
  94             return false;
  95         }
  96
  97         $method = 'get' . $this->map[$call][0];
  98         if ($method == 'getLink') {
  99             $offset = empty($arguments[0]) ? 0 : $arguments[0];
 100             $attribute = empty($arguments[1]) ? 'href' : $arguments[1];
 101             $params = isset($arguments[2]) ? $arguments[2] : array();
 102             return $this->getLink($offset, $attribute, $params);
 103         }
 104         if (method_exists($this, $method)) {
 105             return $this->$method($call, $arguments);
 106         }
 107
 108         return false;
 109     }
 110
 111     /**
 112      * Proxy to allow use of element names as attribute names
 113      *
 114      * For many elements variable-style access will be desirable. This function
 115      * provides for that.
 116      *
 117      * @param   string  $value - the variable required
 118      * @return  mixed
 119      */
 120     function __get($value)
 121     {
 122         return $this->__call($value, array());
 123     }
 124
 125     /**
 126      * Utility function to help us resolve xml:base values
 127      *
 128      * We have other methods which will traverse the DOM and work out the different
 129      * xml:base declarations we need to be aware of. We then need to combine them.
 130      * If a declaration starts with a protocol then we restart the string. If it
 131      * starts with a / then we add on to the domain name. Otherwise we simply tag
 132      * it on to the end.
 133      *
 134      * @param   string  $base - the base to add the link to
 135      * @param   string  $link
 136      */
 137     function combineBases($base, $link)
 138     {
 139         if (preg_match('/^[A-Za-z]+:\/\//', $link)) {
 140             return $link;
 141         } else if (preg_match('/^\//', $link)) {
 142             /* Extract domain and suffix link to that */
 143             preg_match('/^([A-Za-z]+:\/\/.*)?\/*/', $base, $results);
 144             $firstLayer = $results[0];
 145             return $firstLayer . "/" . $link;
 146         } else if (preg_match('/^\.\.\//', $base)) {
 147             /* Step up link to find place to be */
 148             preg_match('/^((\.\.\/)+)(.*)$/', $link, $bases);
 149             $suffix = $bases[3];
 150             $count = preg_match_all('/\.\.\//', $bases[1], $steps);
 151             $url = explode("/", $base);
 152             for ($i = 0; $i <= $count; $i++) {
 153                 array_pop($url);
 154             }
 155             return implode("/", $url) . "/" . $suffix;
 156         } else if (preg_match('/^(?!\/$)/', $base)) {
 157             $base = preg_replace('/(.*\/).*$/', '$1', $base)  ;
 158             return $base . $link;
 159         } else {
 160             /* Just stick it on the end */
 161             return $base . $link;
 162         }
 163     }
 164
 165     /**
 166      * Determine whether we need to apply our xml:base rules
 167      *
 168      * Gets us the xml:base data and then processes that with regard
 169      * to our current link.
 170      *
 171      * @param   string
 172      * @param   DOMElement
 173      * @return  string
 174      */
 175     function addBase($link, $element)
 176     {
 177         if (preg_match('/^[A-Za-z]+:\/\//', $link)) {
 178             return $link;
 179         }
 180
 181         return $this->combineBases($element->baseURI, $link);
 182     }
 183
 184     /**
 185      * Get an entry by its position in the feed, starting from zero
 186      *
 187      * As well as allowing the items to be iterated over we want to allow
 188      * users to be able to access a specific entry. This is one of two ways of
 189      * doing that, the other being by ID.
 190      *
 191      * @param   int $offset
 192      * @return  XML_Feed_Parser_RSS1Element
 193      */
 194     function getEntryByOffset($offset)
 195     {
 196         if (! isset($this->entries[$offset])) {
 197             $entries = $this->model->getElementsByTagName($this->itemElement);
 198             if ($entries->length > $offset) {
 199                 $xmlBase = $entries->item($offset)->baseURI;
 200                 $this->entries[$offset] = new $this->itemClass(
 201                     $entries->item($offset), $this, $xmlBase);
 202                 if ($id = $this->entries[$offset]->id) {
 203                     $this->idMappings[$id] = $this->entries[$offset];
 204                 }
 205             } else {
 206                 throw new XML_Feed_Parser_Exception('No entries found');
 207             }
 208         }
 209
 210         return $this->entries[$offset];
 211     }
 212
 213     /**
 214      * Return a date in seconds since epoch.
 215      *
 216      * Get a date construct. We use PHP's strtotime to return it as a unix datetime, which
 217      * is the number of seconds since 1970-01-01 00:00:00.
 218      *
 219      * @link    http://php.net/strtotime
 220      * @param    string    $method        The name of the date construct we want
 221      * @param    array     $arguments    Included for compatibility with our __call usage
 222      * @return    int|false datetime
 223      */
 224     protected function getDate($method, $arguments)
 225     {
 226         $time = $this->model->getElementsByTagName($method);
 227         if ($time->length == 0 || empty($time->item(0)->nodeValue)) {
 228             return false;
 229         }
 230         return strtotime($time->item(0)->nodeValue);
 231     }
 232
 233     /**
 234      * Get a text construct.
 235      *
 236      * @param    string    $method    The name of the text construct we want
 237      * @param    array     $arguments    Included for compatibility with our __call usage
 238      * @return    string
 239      */
 240     protected function getText($method, $arguments = array())
 241     {
 242         $tags = $this->model->getElementsByTagName($method);
 243         if ($tags->length > 0) {
 244             $value = $tags->item(0)->nodeValue;
 245             return $value;
 246         }
 247         return false;
 248     }
 249
 250     /**
 251      * Apply various rules to retrieve category data.
 252      *
 253      * There is no single way of declaring a category in RSS1/1.1 as there is in RSS2
 254      * and  Atom. Instead the usual approach is to use the dublin core namespace to
 255      * declare  categories. For example delicious use both:
 256      * <dc:subject>PEAR</dc:subject> and: <taxo:topics><rdf:Bag>
 257      * <rdf:li resource="http://del.icio.us/tag/PEAR" /></rdf:Bag></taxo:topics>
 258      * to declare a categorisation of 'PEAR'.
 259      *
 260      * We need to be sensitive to this where possible.
 261      *
 262      * @param    string    $call    for compatibility with our overloading
 263      * @param   array $arguments - arg 0 is the offset, arg 1 is whether to return as array
 264      * @return  string|array|false
 265      */
 266     protected function getCategory($call, $arguments)
 267     {
 268         $categories = $this->model->getElementsByTagName('subject');
 269         $offset = empty($arguments[0]) ? 0 : $arguments[0];
 270         $array = empty($arguments[1]) ? false : true;
 271         if ($categories->length <= $offset) {
 272             return false;
 273         }
 274         if ($array) {
 275             $list = array();
 276             foreach ($categories as $category) {
 277                 array_push($list, $category->nodeValue);
 278             }
 279             return $list;
 280         }
 281         return $categories->item($offset)->nodeValue;
 282     }
 283
 284     /**
 285      * Count occurrences of an element
 286      *
 287      * This function will tell us how many times the element $type
 288      * appears at this level of the feed.
 289      *
 290      * @param    string    $type    the element we want to get a count of
 291      * @return    int
 292      */
 293     protected function count($type)
 294     {
 295         if ($tags = $this->model->getElementsByTagName($type)) {
 296             return $tags->length;
 297         }
 298         return 0;
 299     }
 300
 301     /**
 302      * Part of our xml:base processing code
 303      *
 304      * We need a couple of methods to access XHTML content stored in feeds.
 305      * This is because we dereference all xml:base references before returning
 306      * the element. This method handles the attributes.
 307      *
 308      * @param   DOMElement $node    The DOM node we are iterating over
 309      * @return  string
 310      */
 311     function processXHTMLAttributes($node) {
 312         $return = '';
 313         foreach ($node->attributes as $attribute) {
 314             if ($attribute->name == 'src' or $attribute->name == 'href') {
 315                 $attribute->value = $this->addBase(htmlentities($attribute->value, NULL, 'utf-8'), $attribute);
 316             }
 317             if ($attribute->name == 'base') {
 318                 continue;
 319             }
 320             $return .= $attribute->name . '="' . htmlentities($attribute->value, NULL, 'utf-8') .'" ';
 321         }
 322         if (! empty($return)) {
 323             return ' ' . trim($return);
 324         }
 325         return '';
 326     }
 327
 328     /**
 329      * Convert HTML entities based on the current character set.
 330      *
 331      * @param String
 332      * @return String
 333      */
 334     function processEntitiesForNodeValue($node)
 335     {
 336         if (function_exists('iconv')) {
 337           $current_encoding = $node->ownerDocument->encoding;
 338           $value = iconv($current_encoding, 'UTF-8', $node->nodeValue);
 339         } else if ($current_encoding == 'iso-8859-1') {
 340           $value = utf8_encode($node->nodeValue);
 341         } else {
 342           $value = $node->nodeValue;
 343         }
 344
 345         $decoded = html_entity_decode($value, NULL, 'UTF-8');
 346         return htmlentities($decoded, NULL, 'UTF-8');
 347     }
 348
 349     /**
 350      * Part of our xml:base processing code
 351      *
 352      * We need a couple of methods to access XHTML content stored in feeds.
 353      * This is because we dereference all xml:base references before returning
 354      * the element. This method recurs through the tree descending from the node
 355      * and builds our string.
 356      *
 357      * @param   DOMElement $node    The DOM node we are processing
 358      * @return   string
 359      */
 360     function traverseNode($node)
 361     {
 362         $content = '';
 363
 364         /* Add the opening of this node to the content */
 365         if ($node instanceof DOMElement) {
 366             $content .= '<' . $node->tagName .
 367                 $this->processXHTMLAttributes($node) . '>';
 368         }
 369
 370         /* Process children */
 371         if ($node->hasChildNodes()) {
 372             foreach ($node->childNodes as $child) {
 373                 $content .= $this->traverseNode($child);
 374             }
 375         }
 376
 377         if ($node instanceof DOMText) {
 378             $content .= $this->processEntitiesForNodeValue($node);
 379         }
 380
 381         /* Add the closing of this node to the content */
 382         if ($node instanceof DOMElement) {
 383             $content .= '</' . $node->tagName . '>';
 384         }
 385
 386         return $content;
 387     }
 388
 389     /**
 390      * Get content from RSS feeds (atom has its own implementation)
 391      *
 392      * The official way to include full content in an RSS1 entry is to use
 393      * the content module's element 'encoded', and RSS2 feeds often duplicate that.
 394      * Often, however, the 'description' element is used instead. We will offer that
 395      * as a fallback. Atom uses its own approach and overrides this method.
 396      *
 397      * @return  string|false
 398      */
 399     protected function getContent()
 400     {
 401         $options = array('encoded', 'description');
 402         foreach ($options as $element) {
 403             $test = $this->model->getElementsByTagName($element);
 404             if ($test->length == 0) {
 405                 continue;
 406             }
 407             if ($test->item(0)->hasChildNodes()) {
 408                 $value = '';
 409                 foreach ($test->item(0)->childNodes as $child) {
 410                     if ($child instanceof DOMText) {
 411                         $value .= $child->nodeValue;
 412                     } else {
 413                         $simple = simplexml_import_dom($child);
 414                         $value .= $simple->asXML();
 415                     }
 416                 }
 417                 return $value;
 418             } else if ($test->length > 0) {
 419                 return $test->item(0)->nodeValue;
 420             }
 421         }
 422         return false;
 423     }
 424
 425     /**
 426      * Checks if this element has a particular child element.
 427      *
 428      * @param   String
 429      * @param   Integer
 430      * @return  bool
 431      **/
 432     function hasKey($name, $offset = 0)
 433     {
 434         $search = $this->model->getElementsByTagName($name);
 435         return $search->length > $offset;
 436     }
 437
 438     /**
 439      * Return an XML serialization of the feed, should it be required. Most
 440      * users however, will already have a serialization that they used when
 441      * instantiating the object.
 442      *
 443      * @return    string    XML serialization of element
 444      */
 445     function __toString()
 446     {
 447         $simple = simplexml_import_dom($this->model);
 448         return $simple->asXML();
 449     }
 450
 451     /**
 452      * Get directory holding RNG schemas. Method is based on that
 453      * found in Contact_AddressBook.
 454      *
 455      * @return string PEAR data directory.
 456      * @access public
 457      * @static
 458      */
 459     static function getSchemaDir()
 460     {
 461         require_once 'PEAR/Config.php';
 462         $config = new PEAR_Config;
 463         return $config->get('data_dir') . '/XML_Feed_Parser/schemas';
 464     }
 465 }
 466
 467 ?>