4 * StatusNet - the distributed open-source microblogging tool
5 * Copyright (C) 2010 StatusNet, Inc.
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU Affero General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Affero General Public License for more details.
17 * You should have received a copy of the GNU Affero General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
21 define('INSTALLDIR', realpath(dirname(__FILE__) . '/..'));
23 $shortoptions = 'i:n:f:';
24 $longoptions = array('id=', 'nickname=', 'file=');
26 $helptext = <<<END_OF_IMPORTTWITTERATOM_HELP
27 importtwitteratom.php [options]
28 import an Atom feed from Twitter as notices by a user
30 -i --id ID of user to update
31 -n --nickname nickname of the user to update
32 -f --file file to import (Atom-only for now)
34 END_OF_IMPORTTWITTERATOM_HELP;
36 require_once INSTALLDIR.'/scripts/commandline.inc';
37 require_once INSTALLDIR.'/extlib/htmLawed/htmLawed.php';
39 function getAtomFeedDocument()
41 $filename = get_option_value('f', 'file');
43 if (empty($filename)) {
48 if (!file_exists($filename)) {
49 throw new Exception("No such file '$filename'.");
52 if (!is_file($filename)) {
53 throw new Exception("Not a regular file: '$filename'.");
56 if (!is_readable($filename)) {
57 throw new Exception("File '$filename' not readable.");
60 $xml = file_get_contents($filename);
62 $dom = DOMDocument::loadXML($xml);
64 if ($dom->documentElement->namespaceURI != Activity::ATOM ||
65 $dom->documentElement->localName != 'feed') {
66 throw new Exception("'$filename' is not an Atom feed.");
72 function importActivityStream($user, $doc)
74 $feed = $doc->documentElement;
76 $entries = $feed->getElementsByTagNameNS(Activity::ATOM, 'entry');
78 for ($i = $entries->length - 1; $i >= 0; $i--) {
79 $entry = $entries->item($i);
80 $activity = new Activity($entry, $feed);
81 $object = $activity->objects[0];
82 if (!have_option('q', 'quiet')) {
83 print $activity->content . "\n";
85 $html = getTweetHtml($object->link);
87 $config = array('safe' => 1,
88 'deny_attribute' => 'class,rel,id,style,on*');
90 $html = htmLawed($html, $config);
92 $content = html_entity_decode(strip_tags($html), ENT_QUOTES, 'UTF-8');
94 $notice = Notice::saveNew($user->id,
97 array('uri' => $object->id,
98 'url' => $object->link,
100 'created' => common_sql_date($activity->time),
101 'replies' => array(),
102 'groups' => array()));
106 function getTweetHtml($url)
109 $client = new HTTPClient();
110 $response = $client->get($url);
111 } catch (HTTP_Request2_Exception $e) {
112 print "ERROR: HTTP response " . $e->getMessage() . "\n";
116 if (!$response->isOk()) {
117 print "ERROR: HTTP response " . $response->getCode() . "\n";
121 $body = $response->getBody();
123 return tweetHtmlFromBody($body);
126 function tweetHtmlFromBody($body)
128 $doc = DOMDocument::loadHTML($body);
129 $xpath = new DOMXPath($doc);
131 $spans = $xpath->query('//span[@class="entry-content"]');
133 if ($spans->length == 0) {
134 print "ERROR: No content in tweet page.\n";
138 $span = $spans->item(0);
140 $children = $span->childNodes;
144 for ($i = 0; $i < $children->length; $i++) {
145 $child = $children->item($i);
146 if ($child instanceof DOMElement &&
147 $child->tagName == 'a' &&
148 !preg_match('#^https?://#', $child->getAttribute('href'))) {
149 $child->setAttribute('href', 'http://twitter.com' . $child->getAttribute('href'));
151 $text .= $doc->saveXML($child);
159 $doc = getAtomFeedDocument();
162 importActivityStream($user, $doc);
164 } catch (Exception $e) {
165 print $e->getMessage()."\n";