* and import to StatusNet as Bookmark activities.
*
* The document format is terrible. It consists of a <dl> with
- * a bunch of <dt>'s, occasionally with <dd>'s.
+ * a bunch of <dt>'s, occasionally with <dd>'s adding descriptions.
* There are sometimes <p>'s lost inside.
*
* @param array $data pair of user, text
}
switch (strtolower($child->tagName)) {
case 'dt':
+ // <dt> nodes contain primary information about a bookmark.
+ // We can't import the current one just yet though, since
+ // it may be followed by a <dd>.
if (!empty($dt)) {
// No DD provided
$this->importBookmark($user, $dt);
case 'dd':
$dd = $child;
+ // This <dd> contains a description for the bookmark in
+ // the preceding <dt> node.
$saved = $this->importBookmark($user, $dt, $dd);
$dt = null;
$dd = null;
+ break;
case 'p':
common_log(LOG_INFO, 'Skipping the <p> in the <dl>.');
break;
$dt = $dd = null;
}
}
+ if (!empty($dt)) {
+ // There was a final bookmark without a description.
+ try {
+ $this->importBookmark($user, $dt);
+ } catch (Exception $e) {
+ common_log(LOG_ERR, $e->getMessage());
+ }
+ }
return true;
}
function importBookmark($user, $dt, $dd = null)
{
- // We have to go squirrelling around in the child nodes
- // on the off chance that we've received another <dt>
- // as a child.
-
- for ($i = 0; $i < $dt->childNodes->length; $i++) {
- $child = $dt->childNodes->item($i);
- if ($child->nodeType == XML_ELEMENT_NODE) {
- if ($child->tagName == 'dt' && !is_null($dd)) {
- $this->importBookmark($user, $dt);
- $this->importBookmark($user, $child, $dd);
- return;
- }
- }
+ $as = $dt->getElementsByTagName('a');
+
+ if ($as->length == 0) {
+ throw new ClientException(_("No <A> tag in a <DT>."));
+ }
+
+ $a = $as->item(0);
+
+ $private = $a->getAttribute('private');
+
+ if ($private != 0) {
+ throw new ClientException(_('Skipping private bookmark.'));
+ }
+
+ if (!empty($dd)) {
+ $description = $dd->nodeValue;
+ } else {
+ $description = null;
}
+ $addDate = $a->getAttribute('add_date');
+
+ $data = array(
+ 'profile_id' => $user->id,
+ 'title' => $a->nodeValue,
+ 'description' => $description,
+ 'url' => $a->getAttribute('href'),
+ 'tags' => $a->getAttribute('tags'),
+ 'created' => common_sql_date(intval($addDate))
+ );
$qm = QueueManager::get();
-
- $qm->enqueue(array($user, $dt, $dd), 'dlcsbkmk');
+ $qm->enqueue($data, 'dlcsbkmk');
}
/**
error_reporting($old);
if ($ok) {
+ foreach ($dom->getElementsByTagName('body') as $node) {
+ $this->fixListsIn($node);
+ }
return $dom;
} else {
return null;
}
}
+
+
+ function fixListsIn(DOMNode $body) {
+ $toFix = array();
+
+ foreach ($body->childNodes as $node) {
+ if ($node->nodeType == XML_ELEMENT_NODE) {
+ $el = strtolower($node->nodeName);
+ if ($el == 'dl') {
+ $toFix[] = $node;
+ }
+ }
+ }
+
+ foreach ($toFix as $node) {
+ $this->fixList($node);
+ }
+ }
+
+ function fixList(DOMNode $list) {
+ $toFix = array();
+
+ foreach ($list->childNodes as $node) {
+ if ($node->nodeType == XML_ELEMENT_NODE) {
+ $el = strtolower($node->nodeName);
+ if ($el == 'dt' || $el == 'dd') {
+ $toFix[] = $node;
+ }
+ if ($el == 'dl') {
+ // Sublist.
+ // Technically, these can only appear inside a <dd>...
+ $this->fixList($node);
+ }
+ }
+ }
+
+ foreach ($toFix as $node) {
+ $this->fixListItem($node);
+ }
+ }
+
+ function fixListItem(DOMNode $item) {
+ // The HTML parser in libxml2 doesn't seem to properly handle
+ // many cases of implied close tags, apparently because it doesn't
+ // understand the nesting rules specified in the HTML DTD.
+ //
+ // This leads to sequences of adjacent <dt>s or <dd>s being incorrectly
+ // interpreted as parent->child trees instead of siblings:
+ //
+ // When parsing this input: "<dt>aaa <dt>bbb"
+ // should be equivalent to: "<dt>aaa </dt><dt>bbb</dt>"
+ // but we're seeing instead: "<dt>aaa <dt>bbb</dt></dt>"
+ //
+ // It does at least know that going from dt to dd, or dd to dt,
+ // should make a break.
+
+ $toMove = array();
+
+ foreach ($item->childNodes as $node) {
+ if ($node->nodeType == XML_ELEMENT_NODE) {
+ $el = strtolower($node->nodeName);
+ if ($el == 'dt' || $el == 'dd') {
+ // dt & dd cannot contain each other;
+ // This node was incorrectly placed; move it up a level!
+ $toMove[] = $node;
+ }
+ if ($el == 'dl') {
+ // Sublist.
+ // Technically, these can only appear inside a <dd>.
+ $this->fixList($node);
+ }
+ }
+ }
+
+ $parent = $item->parentNode;
+ $next = $item->nextSibling;
+ foreach ($toMove as $node) {
+ $item->removeChild($node);
+ $parent->insertBefore($node, $next);
+ $this->fixListItem($node);
+ }
+ }
+
}
/**
* Handle the data
*
- * @param array $data array of user, dt, dd
+ * @param array $data associative array of user & bookmark info from DeliciousBackupImporter::importBookmark()
*
* @return boolean success value
*/
function handle($data)
{
- list($user, $dt, $dd) = $data;
+ $profile = Profile::staticGet('id', $data['profile_id']);
- $as = $dt->getElementsByTagName('a');
-
- if ($as->length == 0) {
- throw new ClientException(_("No <A> tag in a <DT>."));
- }
-
- $a = $as->item(0);
-
- $private = $a->getAttribute('private');
-
- if ($private != 0) {
- throw new ClientException(_('Skipping private bookmark.'));
+ try {
+ $saved = Bookmark::saveNew($profile,
+ $data['title'],
+ $data['url'],
+ $data['tags'],
+ $data['description'],
+ array('created' => $data['created'],
+ 'distribute' => false));
+ } catch (ClientException $e) {
+ // Most likely a duplicate -- continue on with the rest!
+ common_log(LOG_ERR, "Error importing delicious bookmark to $data[url]: " . $e->getMessage());
+ return true;
}
- if (!empty($dd)) {
- $description = $dd->nodeValue;
- } else {
- $description = null;
- }
-
- $title = $a->nodeValue;
- $url = $a->getAttribute('href');
- $tags = $a->getAttribute('tags');
- $addDate = $a->getAttribute('add_date');
- $created = common_sql_date(intval($addDate));
-
- $saved = Bookmark::saveNew($user->getProfile(),
- $title,
- $url,
- $tags,
- $description,
- array('created' => $created,
- 'distribute' => false));
-
return true;
}
}
class ImportdeliciousAction extends Action
{
protected $success = false;
+ private $inprogress = false;
/**
* Return the title of the page
$qm = QueueManager::get();
$qm->enqueue(array(common_current_user(), $html), 'dlcsback');
- $this->success = true;
+ if ($qm instanceof UnQueueManager) {
+ // No active queuing means we've actually just completed the job!
+ $this->success = true;
+ } else {
+ // We've fed data into background queues, and it's probably still running.
+ $this->inprogress = true;
+ }
$this->showPage();
{
if ($this->success) {
$this->element('p', null,
- _('Feed will be restored. '.
- 'Please wait a few minutes for results.'));
+ _('Bookmarks have been imported. Your bookmarks should now appear in search and your profile page.'));
+ } else if ($this->inprogress) {
+ $this->element('p', null,
+ _('Bookmarks are being imported. Please wait a few minutes for results.'));
} else {
$form = new ImportDeliciousForm($this);
$form->show();