4 * StatusNet - the distributed open-source microblogging tool
5 * Copyright (C) 2010 StatusNet, Inc.
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU Affero General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Affero General Public License for more details.
17 * You should have received a copy of the GNU Affero General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
21 define('INSTALLDIR', realpath(dirname(__FILE__) . '/..'));
23 $shortoptions = 'i:n:f:';
24 $longoptions = array('id=', 'nickname=', 'file=');
26 $helptext = <<<END_OF_IMPORTTWITTERATOM_HELP
27 importtwitteratom.php [options]
28 import an Atom feed from Twitter as notices by a user
30 -i --id ID of user to update
31 -n --nickname nickname of the user to update
32 -f --file file to import (Atom-only for now)
34 END_OF_IMPORTTWITTERATOM_HELP;
36 require_once INSTALLDIR.'/scripts/commandline.inc';
38 function getAtomFeedDocument()
40 $filename = get_option_value('f', 'file');
42 if (empty($filename)) {
47 if (!file_exists($filename)) {
48 throw new Exception("No such file '$filename'.");
51 if (!is_file($filename)) {
52 throw new Exception("Not a regular file: '$filename'.");
55 if (!is_readable($filename)) {
56 throw new Exception("File '$filename' not readable.");
59 $xml = file_get_contents($filename);
61 $dom = DOMDocument::loadXML($xml);
63 if ($dom->documentElement->namespaceURI != Activity::ATOM ||
64 $dom->documentElement->localName != 'feed') {
65 throw new Exception("'$filename' is not an Atom feed.");
71 function importActivityStream($user, $doc)
73 $feed = $doc->documentElement;
75 $entries = $feed->getElementsByTagNameNS(Activity::ATOM, 'entry');
77 for ($i = $entries->length - 1; $i >= 0; $i--) {
78 $entry = $entries->item($i);
79 $activity = new Activity($entry, $feed);
80 $object = $activity->objects[0];
81 if (!have_option('q', 'quiet')) {
82 print $activity->content . "\n";
84 $html = common_purify(getTweetHtml($object->link));
86 $content = html_entity_decode(strip_tags($html), ENT_QUOTES, 'UTF-8');
88 $notice = Notice::saveNew($user->id,
91 array('uri' => $object->id,
92 'url' => $object->link,
94 'created' => common_sql_date($activity->time),
96 'groups' => array()));
100 function getTweetHtml($url)
103 $client = new HTTPClient();
104 $response = $client->get($url);
105 } catch (Exception $e) {
106 print "ERROR: HTTP response " . $e->getMessage() . "\n";
110 if (!$response->isOk()) {
111 print "ERROR: HTTP response " . $response->getCode() . "\n";
115 $body = $response->getBody();
117 return tweetHtmlFromBody($body);
120 function tweetHtmlFromBody($body)
122 $doc = DOMDocument::loadHTML($body);
123 $xpath = new DOMXPath($doc);
125 $spans = $xpath->query('//span[@class="entry-content"]');
127 if ($spans->length == 0) {
128 print "ERROR: No content in tweet page.\n";
132 $span = $spans->item(0);
134 $children = $span->childNodes;
138 for ($i = 0; $i < $children->length; $i++) {
139 $child = $children->item($i);
140 if ($child instanceof DOMElement &&
141 $child->tagName == 'a' &&
142 !preg_match('#^https?://#', $child->getAttribute('href'))) {
143 $child->setAttribute('href', 'http://twitter.com' . $child->getAttribute('href'));
145 $text .= $doc->saveXML($child);
153 $doc = getAtomFeedDocument();
156 importActivityStream($user, $doc);
158 } catch (Exception $e) {
159 print $e->getMessage()."\n";