3 * StatusNet - the distributed open-source microblogging tool
4 * Copyright (C) 2010, StatusNet, Inc.
6 * Importer class for Delicious.com backups
10 * This program is free software: you can redistribute it and/or modify
11 * it under the terms of the GNU Affero General Public License as published by
12 * the Free Software Foundation, either version 3 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU Affero General Public License for more details.
20 * You should have received a copy of the GNU Affero General Public License
21 * along with this program. If not, see <http://www.gnu.org/licenses/>.
25 * @author Evan Prodromou <evan@status.net>
26 * @copyright 2010 StatusNet, Inc.
27 * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0
28 * @link http://status.net/
31 if (!defined('STATUSNET')) {
32 // This check helps protect against security problems;
33 // your code file can't be executed directly from the web.
37 require_once INSTALLDIR . '/lib/apiauth.php';
40 * Importer class for Delicious bookmarks
44 * @author Evan Prodromou <evan@status.net>
45 * @copyright 2010 StatusNet, Inc.
46 * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0
47 * @link http://status.net/
50 class DeliciousBackupImporter
53 * Import an in-memory bookmark list to a user's account
55 * Take a delicious.com backup file (same as Netscape bookmarks.html)
56 * and import to StatusNet as Bookmark activities.
58 * The document format is terrible. It consists of a <dl> with
59 * a bunch of <dt>'s, occasionally with <dd>'s.
60 * There are sometimes <p>'s lost inside.
62 * @param User $user User whose feed we're going to fill
63 * @param string $body Body of the file
68 function importBookmarks($user, $body)
70 $doc = $this->importHTML($body);
72 $dls = $doc->getElementsByTagName('dl');
74 if ($dls->length != 1) {
75 throw new ClientException(_("Bad import file."));
80 $children = $dl->childNodes;
84 for ($i = 0; $i < $children->length; $i++) {
86 $child = $children->item($i);
87 if ($child->nodeType != XML_ELEMENT_NODE) {
90 common_log(LOG_INFO, $child->tagName);
91 switch (strtolower($child->tagName)) {
95 $this->importBookmark($user, $dt);
103 $saved = $this->importBookmark($user, $dt, $dd);
108 common_log(LOG_INFO, 'Skipping the <p> in the <dl>.');
111 common_log(LOG_WARNING,
112 "Unexpected element $child->tagName ".
113 " found in import.");
115 } catch (Exception $e) {
116 common_log(LOG_ERR, $e->getMessage());
123 * Import a single bookmark
125 * Takes a <dt>/<dd> pair. The <dt> has a single
126 * <a> in it with some non-standard attributes.
128 * A <dt><dt><dd> sequence will appear as a <dt> with
129 * anothe <dt> as a child. We handle this case recursively.
131 * @param User $user User to import data as
132 * @param DOMElement $dt <dt> element
133 * @param DOMElement $dd <dd> element
135 * @return Notice imported notice
138 function importBookmark($user, $dt, $dd = null)
140 // We have to go squirrelling around in the child nodes
141 // on the off chance that we've received another <dt>
144 for ($i = 0; $i < $dt->childNodes->length; $i++) {
145 $child = $dt->childNodes->item($i);
146 if ($child->nodeType == XML_ELEMENT_NODE) {
147 if ($child->tagName == 'dt' && !is_null($dd)) {
148 $this->importBookmark($user, $dt);
149 $this->importBookmark($user, $child, $dd);
155 $as = $dt->getElementsByTagName('a');
157 if ($as->length == 0) {
158 throw new ClientException(_("No <A> tag in a <DT>."));
163 $private = $a->getAttribute('private');
166 throw new ClientException(_('Skipping private bookmark.'));
170 $description = $dd->nodeValue;
175 $title = $a->nodeValue;
176 $url = $a->getAttribute('href');
177 $tags = $a->getAttribute('tags');
178 $addDate = $a->getAttribute('add_date');
179 $created = common_sql_date(intval($addDate));
181 $saved = Notice_bookmark::saveNew($user,
186 array('created' => $created));
194 * Hides the errors that the dom parser returns
196 * @param string $body Data to import
198 * @return DOMDocument parsed document
201 function importHTML($body)
203 // DOMDocument::loadHTML may throw warnings on unrecognized elements,
204 // and notices on unrecognized namespaces.
205 $old = error_reporting(error_reporting() & ~(E_WARNING | E_NOTICE));
206 $dom = new DOMDocument();
207 $ok = $dom->loadHTML($body);
208 error_reporting($old);