4 * StatusNet - the distributed open-source microblogging tool
5 * Copyright (C) 2010 StatusNet, Inc.
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU Affero General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Affero General Public License for more details.
17 * You should have received a copy of the GNU Affero General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
21 define('INSTALLDIR', dirname(__DIR__));
22 define('PUBLICDIR', INSTALLDIR . DIRECTORY_SEPARATOR . 'public');
24 $shortoptions = 'i:n:f:';
25 $longoptions = array('id=', 'nickname=', 'file=');
27 $helptext = <<<END_OF_IMPORTTWITTERATOM_HELP
28 importtwitteratom.php [options]
29 import an Atom feed from Twitter as notices by a user
31 -i --id ID of user to update
32 -n --nickname nickname of the user to update
33 -f --file file to import (Atom-only for now)
35 END_OF_IMPORTTWITTERATOM_HELP;
37 require_once INSTALLDIR.'/scripts/commandline.inc';
39 function getAtomFeedDocument()
41 $filename = get_option_value('f', 'file');
43 if (empty($filename)) {
48 if (!file_exists($filename)) {
49 throw new Exception("No such file '$filename'.");
52 if (!is_file($filename)) {
53 throw new Exception("Not a regular file: '$filename'.");
56 if (!is_readable($filename)) {
57 throw new Exception("File '$filename' not readable.");
60 $xml = file_get_contents($filename);
62 $dom = DOMDocument::loadXML($xml);
64 if ($dom->documentElement->namespaceURI != Activity::ATOM ||
65 $dom->documentElement->localName != 'feed') {
66 throw new Exception("'$filename' is not an Atom feed.");
72 function importActivityStream($user, $doc)
74 $feed = $doc->documentElement;
76 $entries = $feed->getElementsByTagNameNS(Activity::ATOM, 'entry');
78 for ($i = $entries->length - 1; $i >= 0; $i--) {
79 $entry = $entries->item($i);
80 $activity = new Activity($entry, $feed);
81 $object = $activity->objects[0];
82 if (!have_option('q', 'quiet')) {
83 print $activity->content . "\n";
85 $html = common_purify(getTweetHtml($object->link));
87 $content = html_entity_decode(strip_tags($html), ENT_QUOTES, 'UTF-8');
89 $notice = Notice::saveNew($user->id,
92 array('uri' => $object->id,
93 'url' => $object->link,
95 'created' => common_sql_date($activity->time),
97 'groups' => array()));
101 function getTweetHtml($url)
104 $client = new HTTPClient();
105 $response = $client->get($url);
106 } catch (Exception $e) {
107 print "ERROR: HTTP response " . $e->getMessage() . "\n";
111 if (!$response->isOk()) {
112 print "ERROR: HTTP response " . $response->getCode() . "\n";
116 $body = $response->getBody();
118 return tweetHtmlFromBody($body);
121 function tweetHtmlFromBody($body)
123 $doc = DOMDocument::loadHTML($body);
124 $xpath = new DOMXPath($doc);
126 $spans = $xpath->query('//span[@class="entry-content"]');
128 if ($spans->length == 0) {
129 print "ERROR: No content in tweet page.\n";
133 $span = $spans->item(0);
135 $children = $span->childNodes;
139 for ($i = 0; $i < $children->length; $i++) {
140 $child = $children->item($i);
141 if ($child instanceof DOMElement &&
142 $child->tagName == 'a' &&
143 !preg_match('#^https?://#', $child->getAttribute('href'))) {
144 $child->setAttribute('href', 'http://twitter.com' . $child->getAttribute('href'));
146 $text .= $doc->saveXML($child);
154 $doc = getAtomFeedDocument();
157 importActivityStream($user, $doc);
159 } catch (Exception $e) {
160 print $e->getMessage()."\n";