X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=plugins%2FBookmark%2Fdeliciousbackupimporter.php;h=0ceba61d89a1619b009cf771bf4e20b82b335a1a;hb=b6cfcfbcaa0459b39c5d581c103bfa031b2e02cd;hp=01b996bbe534df1103db8d9725a7bfa7bed712f6;hpb=402cac2f93d6ccb89abf0e8a314e3f69d597b898;p=quix0rs-gnu-social.git diff --git a/plugins/Bookmark/deliciousbackupimporter.php b/plugins/Bookmark/deliciousbackupimporter.php index 01b996bbe5..0ceba61d89 100644 --- a/plugins/Bookmark/deliciousbackupimporter.php +++ b/plugins/Bookmark/deliciousbackupimporter.php @@ -4,7 +4,7 @@ * Copyright (C) 2010, StatusNet, Inc. * * Importer class for Delicious.com backups - * + * * PHP version 5 * * This program is free software: you can redistribute it and/or modify @@ -44,7 +44,6 @@ if (!defined('STATUSNET')) { * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0 * @link http://status.net/ */ - class DeliciousBackupImporter extends QueueHandler { /** @@ -52,7 +51,6 @@ class DeliciousBackupImporter extends QueueHandler * * @return string transport string */ - function transport() { return 'dlcsback'; @@ -65,24 +63,37 @@ class DeliciousBackupImporter extends QueueHandler * and import to StatusNet as Bookmark activities. * * The document format is terrible. It consists of a
with - * a bunch of
's, occasionally with
's. + * a bunch of
's, occasionally with
's adding descriptions. * There are sometimes

's lost inside. * * @param array $data pair of user, text * * @return boolean success value */ - function handle($data) { list($user, $body) = $data; - $doc = $this->importHTML($body); + try { + $doc = $this->importHTML($body); + } catch (ClientException $cex) { + // XXX: message to the user + common_log(LOG_WARNING, $cex->getMessage()); + return true; + } + + // If we can't parse it, it's no good + + if (empty($doc)) { + return true; + } $dls = $doc->getElementsByTagName('dl'); if ($dls->length != 1) { - throw new ClientException(_("Bad import file.")); + // XXX: message to the user + common_log(LOG_WARNING, 'Bad input file'); + return true; } $dl = $dls->item(0); @@ -97,9 +108,11 @@ class DeliciousBackupImporter extends QueueHandler if ($child->nodeType != XML_ELEMENT_NODE) { continue; } - common_log(LOG_INFO, $child->tagName); switch (strtolower($child->tagName)) { case 'dt': + //

nodes contain primary information about a bookmark. + // We can't import the current one just yet though, since + // it may be followed by a
. if (!empty($dt)) { // No DD provided $this->importBookmark($user, $dt); @@ -110,15 +123,20 @@ class DeliciousBackupImporter extends QueueHandler case 'dd': $dd = $child; - $saved = $this->importBookmark($user, $dt, $dd); + if (!empty($dt)) { + // This
contains a description for the bookmark in + // the preceding
node. + $saved = $this->importBookmark($user, $dt, $dd); + } $dt = null; $dd = null; + break; case 'p': common_log(LOG_INFO, 'Skipping the

in the

.'); break; default: - common_log(LOG_WARNING, + common_log(LOG_WARNING, "Unexpected element $child->tagName ". " found in import."); } @@ -127,18 +145,26 @@ class DeliciousBackupImporter extends QueueHandler $dt = $dd = null; } } + if (!empty($dt)) { + // There was a final bookmark without a description. + try { + $this->importBookmark($user, $dt); + } catch (Exception $e) { + common_log(LOG_ERR, $e->getMessage()); + } + } return true; } /** * Import a single bookmark - * + * * Takes a
/
pair. The
has a single * in it with some non-standard attributes. - * + * * A
sequence will appear as a
with - * anothe
as a child. We handle this case recursively. + * anothe
as a child. We handle this case recursively. * * @param User $user User to import data as * @param DOMElement $dt
element @@ -146,27 +172,42 @@ class DeliciousBackupImporter extends QueueHandler * * @return Notice imported notice */ - function importBookmark($user, $dt, $dd = null) { - // We have to go squirrelling around in the child nodes - // on the off chance that we've received another
- // as a child. - - for ($i = 0; $i < $dt->childNodes->length; $i++) { - $child = $dt->childNodes->item($i); - if ($child->nodeType == XML_ELEMENT_NODE) { - if ($child->tagName == 'dt' && !is_null($dd)) { - $this->importBookmark($user, $dt); - $this->importBookmark($user, $child, $dd); - return; - } - } + $as = $dt->getElementsByTagName('a'); + + if ($as->length == 0) { + // TRANS: Client exception thrown when a bookmark in an import file is incorrectly formatted. + throw new ClientException(_m("No tag in a
.")); } + $a = $as->item(0); + + $private = $a->getAttribute('private'); + + if ($private != 0) { + // TRANS: Client exception thrown when a bookmark in an import file is private. + throw new ClientException(_m('Skipping private bookmark.')); + } + + if (!empty($dd)) { + $description = $dd->nodeValue; + } else { + $description = null; + } + $addDate = $a->getAttribute('add_date'); + + $data = array( + 'profile_id' => $user->id, + 'title' => $a->nodeValue, + 'description' => $description, + 'url' => $a->getAttribute('href'), + 'tags' => $a->getAttribute('tags'), + 'created' => common_sql_date(intval($addDate)) + ); + $qm = QueueManager::get(); - - $qm->enqueue(array($user, $dt, $dd), 'dlcsbkmk'); + $qm->enqueue($data, 'dlcsbkmk'); } /** @@ -189,9 +230,94 @@ class DeliciousBackupImporter extends QueueHandler error_reporting($old); if ($ok) { + foreach ($dom->getElementsByTagName('body') as $node) { + $this->fixListsIn($node); + } return $dom; } else { return null; } } + + + function fixListsIn(DOMNode $body) { + $toFix = array(); + + foreach ($body->childNodes as $node) { + if ($node->nodeType == XML_ELEMENT_NODE) { + $el = strtolower($node->nodeName); + if ($el == 'dl') { + $toFix[] = $node; + } + } + } + + foreach ($toFix as $node) { + $this->fixList($node); + } + } + + function fixList(DOMNode $list) { + $toFix = array(); + + foreach ($list->childNodes as $node) { + if ($node->nodeType == XML_ELEMENT_NODE) { + $el = strtolower($node->nodeName); + if ($el == 'dt' || $el == 'dd') { + $toFix[] = $node; + } + if ($el == 'dl') { + // Sublist. + // Technically, these can only appear inside a
... + $this->fixList($node); + } + } + } + + foreach ($toFix as $node) { + $this->fixListItem($node); + } + } + + function fixListItem(DOMNode $item) { + // The HTML parser in libxml2 doesn't seem to properly handle + // many cases of implied close tags, apparently because it doesn't + // understand the nesting rules specified in the HTML DTD. + // + // This leads to sequences of adjacent
s or
s being incorrectly + // interpreted as parent->child trees instead of siblings: + // + // When parsing this input: "
aaa
bbb" + // should be equivalent to: "
aaa
bbb
" + // but we're seeing instead: "
aaa
bbb
" + // + // It does at least know that going from dt to dd, or dd to dt, + // should make a break. + + $toMove = array(); + + foreach ($item->childNodes as $node) { + if ($node->nodeType == XML_ELEMENT_NODE) { + $el = strtolower($node->nodeName); + if ($el == 'dt' || $el == 'dd') { + // dt & dd cannot contain each other; + // This node was incorrectly placed; move it up a level! + $toMove[] = $node; + } + if ($el == 'dl') { + // Sublist. + // Technically, these can only appear inside a
. + $this->fixList($node); + } + } + } + + $parent = $item->parentNode; + $next = $item->nextSibling; + foreach ($toMove as $node) { + $item->removeChild($node); + $parent->insertBefore($node, $next); + $this->fixListItem($node); + } + } }