X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=plugins%2FBookmark%2Flib%2Fdeliciousbackupimporter.php;fp=plugins%2FBookmark%2Flib%2Fdeliciousbackupimporter.php;h=0ceba61d89a1619b009cf771bf4e20b82b335a1a;hb=de55d8f83bb2ecf9461510768fe7147aec592055;hp=0000000000000000000000000000000000000000;hpb=b6cfcfbcaa0459b39c5d581c103bfa031b2e02cd;p=quix0rs-gnu-social.git diff --git a/plugins/Bookmark/lib/deliciousbackupimporter.php b/plugins/Bookmark/lib/deliciousbackupimporter.php new file mode 100644 index 0000000000..0ceba61d89 --- /dev/null +++ b/plugins/Bookmark/lib/deliciousbackupimporter.php @@ -0,0 +1,323 @@ +. + * + * @category Bookmark + * @package StatusNet + * @author Evan Prodromou + * @copyright 2010 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0 + * @link http://status.net/ + */ + +if (!defined('STATUSNET')) { + // This check helps protect against security problems; + // your code file can't be executed directly from the web. + exit(1); +} + +/** + * Importer class for Delicious bookmarks + * + * @category Bookmark + * @package StatusNet + * @author Evan Prodromou + * @copyright 2010 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0 + * @link http://status.net/ + */ +class DeliciousBackupImporter extends QueueHandler +{ + /** + * Transport of the importer + * + * @return string transport string + */ + function transport() + { + return 'dlcsback'; + } + + /** + * Import an in-memory bookmark list to a user's account + * + * Take a delicious.com backup file (same as Netscape bookmarks.html) + * and import to StatusNet as Bookmark activities. + * + * The document format is terrible. It consists of a
with + * a bunch of
's, occasionally with
's adding descriptions. + * There are sometimes

's lost inside. + * + * @param array $data pair of user, text + * + * @return boolean success value + */ + function handle($data) + { + list($user, $body) = $data; + + try { + $doc = $this->importHTML($body); + } catch (ClientException $cex) { + // XXX: message to the user + common_log(LOG_WARNING, $cex->getMessage()); + return true; + } + + // If we can't parse it, it's no good + + if (empty($doc)) { + return true; + } + + $dls = $doc->getElementsByTagName('dl'); + + if ($dls->length != 1) { + // XXX: message to the user + common_log(LOG_WARNING, 'Bad input file'); + return true; + } + + $dl = $dls->item(0); + + $children = $dl->childNodes; + + $dt = null; + + for ($i = 0; $i < $children->length; $i++) { + try { + $child = $children->item($i); + if ($child->nodeType != XML_ELEMENT_NODE) { + continue; + } + switch (strtolower($child->tagName)) { + case 'dt': + //

nodes contain primary information about a bookmark. + // We can't import the current one just yet though, since + // it may be followed by a
. + if (!empty($dt)) { + // No DD provided + $this->importBookmark($user, $dt); + $dt = null; + } + $dt = $child; + break; + case 'dd': + $dd = $child; + + if (!empty($dt)) { + // This
contains a description for the bookmark in + // the preceding
node. + $saved = $this->importBookmark($user, $dt, $dd); + } + + $dt = null; + $dd = null; + break; + case 'p': + common_log(LOG_INFO, 'Skipping the

in the

.'); + break; + default: + common_log(LOG_WARNING, + "Unexpected element $child->tagName ". + " found in import."); + } + } catch (Exception $e) { + common_log(LOG_ERR, $e->getMessage()); + $dt = $dd = null; + } + } + if (!empty($dt)) { + // There was a final bookmark without a description. + try { + $this->importBookmark($user, $dt); + } catch (Exception $e) { + common_log(LOG_ERR, $e->getMessage()); + } + } + + return true; + } + + /** + * Import a single bookmark + * + * Takes a
/
pair. The
has a single + * in it with some non-standard attributes. + * + * A
sequence will appear as a
with + * anothe
as a child. We handle this case recursively. + * + * @param User $user User to import data as + * @param DOMElement $dt
element + * @param DOMElement $dd
element + * + * @return Notice imported notice + */ + function importBookmark($user, $dt, $dd = null) + { + $as = $dt->getElementsByTagName('a'); + + if ($as->length == 0) { + // TRANS: Client exception thrown when a bookmark in an import file is incorrectly formatted. + throw new ClientException(_m("No tag in a
.")); + } + + $a = $as->item(0); + + $private = $a->getAttribute('private'); + + if ($private != 0) { + // TRANS: Client exception thrown when a bookmark in an import file is private. + throw new ClientException(_m('Skipping private bookmark.')); + } + + if (!empty($dd)) { + $description = $dd->nodeValue; + } else { + $description = null; + } + $addDate = $a->getAttribute('add_date'); + + $data = array( + 'profile_id' => $user->id, + 'title' => $a->nodeValue, + 'description' => $description, + 'url' => $a->getAttribute('href'), + 'tags' => $a->getAttribute('tags'), + 'created' => common_sql_date(intval($addDate)) + ); + + $qm = QueueManager::get(); + $qm->enqueue($data, 'dlcsbkmk'); + } + + /** + * Parse some HTML + * + * Hides the errors that the dom parser returns + * + * @param string $body Data to import + * + * @return DOMDocument parsed document + */ + + function importHTML($body) + { + // DOMDocument::loadHTML may throw warnings on unrecognized elements, + // and notices on unrecognized namespaces. + $old = error_reporting(error_reporting() & ~(E_WARNING | E_NOTICE)); + $dom = new DOMDocument(); + $ok = $dom->loadHTML($body); + error_reporting($old); + + if ($ok) { + foreach ($dom->getElementsByTagName('body') as $node) { + $this->fixListsIn($node); + } + return $dom; + } else { + return null; + } + } + + + function fixListsIn(DOMNode $body) { + $toFix = array(); + + foreach ($body->childNodes as $node) { + if ($node->nodeType == XML_ELEMENT_NODE) { + $el = strtolower($node->nodeName); + if ($el == 'dl') { + $toFix[] = $node; + } + } + } + + foreach ($toFix as $node) { + $this->fixList($node); + } + } + + function fixList(DOMNode $list) { + $toFix = array(); + + foreach ($list->childNodes as $node) { + if ($node->nodeType == XML_ELEMENT_NODE) { + $el = strtolower($node->nodeName); + if ($el == 'dt' || $el == 'dd') { + $toFix[] = $node; + } + if ($el == 'dl') { + // Sublist. + // Technically, these can only appear inside a
... + $this->fixList($node); + } + } + } + + foreach ($toFix as $node) { + $this->fixListItem($node); + } + } + + function fixListItem(DOMNode $item) { + // The HTML parser in libxml2 doesn't seem to properly handle + // many cases of implied close tags, apparently because it doesn't + // understand the nesting rules specified in the HTML DTD. + // + // This leads to sequences of adjacent
s or
s being incorrectly + // interpreted as parent->child trees instead of siblings: + // + // When parsing this input: "
aaa
bbb" + // should be equivalent to: "
aaa
bbb
" + // but we're seeing instead: "
aaa
bbb
" + // + // It does at least know that going from dt to dd, or dd to dt, + // should make a break. + + $toMove = array(); + + foreach ($item->childNodes as $node) { + if ($node->nodeType == XML_ELEMENT_NODE) { + $el = strtolower($node->nodeName); + if ($el == 'dt' || $el == 'dd') { + // dt & dd cannot contain each other; + // This node was incorrectly placed; move it up a level! + $toMove[] = $node; + } + if ($el == 'dl') { + // Sublist. + // Technically, these can only appear inside a
. + $this->fixList($node); + } + } + } + + $parent = $item->parentNode; + $next = $item->nextSibling; + foreach ($toMove as $node) { + $item->removeChild($node); + $parent->insertBefore($node, $next); + $this->fixListItem($node); + } + } +}