2 /* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
5 * Key gateway class for XML_Feed_Parser package
9 * LICENSE: This source file is subject to version 3.0 of the PHP license
10 * that is available through the world-wide-web at the following URI:
11 * http://www.php.net/license/3_0.txt. If you did not receive a copy of
12 * the PHP License and are unable to obtain it through the web, please
13 * send a note to license@php.net so we can mail you a copy immediately.
16 * @package XML_Feed_Parser
17 * @author James Stewart <james@jystewart.net>
18 * @copyright 2005 James Stewart <james@jystewart.net>
19 * @license http://www.gnu.org/copyleft/lesser.html GNU LGPL
20 * @version CVS: $Id: Parser.php,v 1.24 2006/08/15 13:04:00 jystewart Exp $
21 * @link http://pear.php.net/package/XML_Feed_Parser/
25 * XML_Feed_Parser_Type is an abstract class required by all of our
26 * feed types. It makes sense to load it here to keep the other files
29 require_once 'XML/Feed/Parser/Type.php';
32 * We will throw exceptions when errors occur.
34 require_once 'XML/Feed/Parser/Exception.php';
37 * This is the core of the XML_Feed_Parser package. It identifies feed types
38 * and abstracts access to them. It is an iterator, allowing for easy access
41 * @author James Stewart <james@jystewart.net>
42 * @version Release: 1.0.3
43 * @package XML_Feed_Parser
45 class XML_Feed_Parser implements Iterator
48 * This is where we hold the feed object
54 * To allow for extensions, we make a public reference to the feed model
60 * A map between entry ID and offset
63 protected $idMappings = array();
66 * A storage space for Namespace URIs.
69 private $feedNamespaces = array(
71 'http://backend.userland.com/rss',
72 'http://backend.userland.com/rss2',
73 'http://blogs.law.harvard.edu/tech/rss'));
75 * Detects feed types and instantiate appropriate objects.
77 * Our constructor takes care of detecting feed types and instantiating
78 * appropriate classes. For now we're going to treat Atom 0.3 as Atom 1.0
79 * but raise a warning. I do not intend to introduce full support for
80 * Atom 0.3 as it has been deprecated, but others are welcome to.
82 * @param string $feed XML serialization of the feed
83 * @param bool $strict Whether or not to validate the feed
84 * @param bool $suppressWarnings Trigger errors for deprecated feed types?
85 * @param bool $tidy Whether or not to try and use the tidy library on input
87 function __construct($feed, $strict = false, $suppressWarnings = false, $tidy = false)
89 $this->model = new DOMDocument;
90 if (! $this->model->loadXML($feed)) {
91 if (extension_loaded('tidy') && $tidy) {
93 $tidy->parseString($feed,
94 array('input-xml' => true, 'output-xml' => true));
96 if (! $this->model->loadXML((string) $tidy)) {
97 throw new XML_Feed_Parser_Exception('Invalid input: this is not ' .
101 throw new XML_Feed_Parser_Exception('Invalid input: this is not valid XML');
106 /* detect feed type */
107 $doc_element = $this->model->documentElement;
111 case ($doc_element->namespaceURI == 'http://www.w3.org/2005/Atom'):
112 require_once 'XML/Feed/Parser/Atom.php';
113 require_once 'XML/Feed/Parser/AtomElement.php';
114 $class = 'XML_Feed_Parser_Atom';
116 case ($doc_element->namespaceURI == 'http://purl.org/atom/ns#'):
117 require_once 'XML/Feed/Parser/Atom.php';
118 require_once 'XML/Feed/Parser/AtomElement.php';
119 $class = 'XML_Feed_Parser_Atom';
120 $error = 'Atom 0.3 deprecated, using 1.0 parser which won\'t provide ' .
123 case ($doc_element->namespaceURI == 'http://purl.org/rss/1.0/' ||
124 ($doc_element->hasChildNodes() && $doc_element->childNodes->length > 1
125 && $doc_element->childNodes->item(1)->namespaceURI ==
126 'http://purl.org/rss/1.0/')):
127 require_once 'XML/Feed/Parser/RSS1.php';
128 require_once 'XML/Feed/Parser/RSS1Element.php';
129 $class = 'XML_Feed_Parser_RSS1';
131 case ($doc_element->namespaceURI == 'http://purl.org/rss/1.1/' ||
132 ($doc_element->hasChildNodes() && $doc_element->childNodes->length > 1
133 && $doc_element->childNodes->item(1)->namespaceURI ==
134 'http://purl.org/rss/1.1/')):
135 require_once 'XML/Feed/Parser/RSS11.php';
136 require_once 'XML/Feed/Parser/RSS11Element.php';
137 $class = 'XML_Feed_Parser_RSS11';
139 case (($doc_element->hasChildNodes() && $doc_element->childNodes->length > 1
140 && $doc_element->childNodes->item(1)->namespaceURI ==
141 'http://my.netscape.com/rdf/simple/0.9/') ||
142 $doc_element->namespaceURI == 'http://my.netscape.com/rdf/simple/0.9/'):
143 require_once 'XML/Feed/Parser/RSS09.php';
144 require_once 'XML/Feed/Parser/RSS09Element.php';
145 $class = 'XML_Feed_Parser_RSS09';
147 case ($doc_element->tagName == 'rss' and
148 $doc_element->hasAttribute('version') &&
149 $doc_element->getAttribute('version') == 0.91):
150 $error = 'RSS 0.91 has been superceded by RSS2.0. Using RSS2.0 parser.';
151 require_once 'XML/Feed/Parser/RSS2.php';
152 require_once 'XML/Feed/Parser/RSS2Element.php';
153 $class = 'XML_Feed_Parser_RSS2';
155 case ($doc_element->tagName == 'rss' and
156 $doc_element->hasAttribute('version') &&
157 $doc_element->getAttribute('version') == 0.92):
158 $error = 'RSS 0.92 has been superceded by RSS2.0. Using RSS2.0 parser.';
159 require_once 'XML/Feed/Parser/RSS2.php';
160 require_once 'XML/Feed/Parser/RSS2Element.php';
161 $class = 'XML_Feed_Parser_RSS2';
163 case (in_array($doc_element->namespaceURI, $this->feedNamespaces['rss2'])
164 || $doc_element->tagName == 'rss'):
165 if (! $doc_element->hasAttribute('version') ||
166 $doc_element->getAttribute('version') != 2) {
167 $error = 'RSS version not specified. Parsing as RSS2.0';
169 require_once 'XML/Feed/Parser/RSS2.php';
170 require_once 'XML/Feed/Parser/RSS2Element.php';
171 $class = 'XML_Feed_Parser_RSS2';
174 throw new XML_Feed_Parser_Exception('Feed type unknown');
178 if (! $suppressWarnings && ! empty($error)) {
179 trigger_error($error, E_USER_WARNING);
182 /* Instantiate feed object */
183 $this->feed = new $class($this->model, $strict);
187 * Proxy to allow feed element names to be used as method names
189 * For top-level feed elements we will provide access using methods or
190 * attributes. This function simply passes on a request to the appropriate
193 * @param string $call - the method being called
194 * @param array $attributes
196 function __call($call, $attributes)
198 $attributes = array_pad($attributes, 5, false);
199 list($a, $b, $c, $d, $e) = $attributes;
200 return $this->feed->$call($a, $b, $c, $d, $e);
204 * Proxy to allow feed element names to be used as attribute names
206 * To allow variable-like access to feed-level data we use this
207 * method. It simply passes along to __call() which in turn passes
208 * along to the relevant object.
210 * @param string $val - the name of the variable required
214 return $this->feed->$val;
218 * Provides iteration functionality.
220 * Of course we must be able to iterate... This function simply increases
221 * our internal counter.
225 if (isset($this->current_item) &&
226 $this->current_item <= $this->feed->numberEntries - 1) {
227 ++$this->current_item;
228 } else if (! isset($this->current_item)) {
229 $this->current_item = 0;
236 * Return XML_Feed_Type object for current element
238 * @return XML_Feed_Parser_Type Object
242 return $this->getEntryByOffset($this->current_item);
246 * For iteration -- returns the key for the current stage in the array.
252 return $this->current_item;
256 * For iteration -- tells whether we have reached the
263 return $this->current_item < $this->feed->numberEntries;
267 * For iteration -- resets the internal counter to the beginning.
271 $this->current_item = 0;
275 * Provides access to entries by ID if one is specified in the source feed.
277 * As well as allowing the items to be iterated over we want to allow
278 * users to be able to access a specific entry. This is one of two ways of
279 * doing that, the other being by offset. This method can be quite slow
280 * if dealing with a large feed that hasn't yet been processed as it
281 * instantiates objects for every entry until it finds the one needed.
283 * @param string $id Valid ID for the given feed format
284 * @return XML_Feed_Parser_Type|false
286 function getEntryById($id)
288 if (isset($this->idMappings[$id])) {
289 return $this->getEntryByOffset($this->idMappings[$id]);
293 * Since we have not yet encountered that ID, let's go through all the
294 * remaining entries in order till we find it.
295 * This is a fairly slow implementation, but it should work.
297 return $this->feed->getEntryById($id);
301 * Retrieve entry by numeric offset, starting from zero.
303 * As well as allowing the items to be iterated over we want to allow
304 * users to be able to access a specific entry. This is one of two ways of
305 * doing that, the other being by ID.
307 * @param int $offset The position of the entry within the feed, starting from 0
308 * @return XML_Feed_Parser_Type|false
310 function getEntryByOffset($offset)
312 if ($offset < $this->feed->numberEntries) {
313 if (isset($this->feed->entries[$offset])) {
314 return $this->feed->entries[$offset];
317 $this->feed->getEntryByOffset($offset);
318 } catch (Exception $e) {
321 $id = $this->feed->entries[$offset]->getID();
322 $this->idMappings[$id] = $offset;
323 return $this->feed->entries[$offset];
331 * Retrieve version details from feed type class.
334 * @author James Stewart
338 return $this->feed->version;
342 * Returns a string representation of the feed.
346 function __toString()
348 return $this->feed->__toString();