4 * StatusNet - the distributed open-source microblogging tool
5 * Copyright (C) 2008-2010, StatusNet, Inc.
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU Affero General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Affero General Public License for more details.
17 * You should have received a copy of the GNU Affero General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
21 define('INSTALLDIR', realpath(dirname(__FILE__) . '/../../..'));
23 // Tune number of processes and how often to poll Twitter
24 // XXX: Should these things be in config.php?
25 define('MAXCHILDREN', 2);
26 define('POLL_INTERVAL', 60); // in seconds
28 $shortoptions = 'di::';
29 $longoptions = array('id::', 'debug');
31 $helptext = <<<END_OF_TRIM_HELP
32 Batch script for retrieving Twitter messages from foreign service.
34 -i --id Identity (default 'generic')
35 -d --debug Debug (lots of log output)
39 require_once INSTALLDIR . '/scripts/commandline.inc';
40 require_once INSTALLDIR . '/lib/common.php';
41 require_once INSTALLDIR . '/lib/daemon.php';
42 require_once INSTALLDIR . '/plugins/TwitterBridge/twitter.php';
43 require_once INSTALLDIR . '/plugins/TwitterBridge/twitteroauthclient.php';
46 * Fetch statuses from Twitter
48 * Fetches statuses from Twitter and inserts them as notices
50 * NOTE: an Avatar path MUST be set in config.php for this
51 * script to work, e.g.:
52 * $config['avatar']['path'] = $config['site']['path'] . '/avatar/';
54 * @todo @fixme @gar Fix the above. For some reason $_path is always empty when
55 * this script is run, so the default avatar path is always set wrong in
56 * default.php. Therefore it must be set explicitly in config.php. --Z
60 * @author Zach Copley <zach@status.net>
61 * @author Evan Prodromou <evan@status.net>
62 * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0
63 * @link http://status.net/
65 class TwitterStatusFetcher extends ParallelizingDaemon
70 * @param string $id the name/id of this daemon
71 * @param int $interval sleep this long before doing everything again
72 * @param int $max_children maximum number of child processes at a time
73 * @param boolean $debug debug output flag
78 function __construct($id = null, $interval = 60,
79 $max_children = 2, $debug = null)
81 parent::__construct($id, $interval, $max_children, $debug);
87 * @return string Name of the daemon.
91 return ('twitterstatusfetcher.'.$this->_id);
95 * Find all the Twitter foreign links for users who have requested
96 * importing of their friends' timelines
98 * @return array flinks an array of Foreign_link objects
100 function getObjects()
102 global $_DB_DATAOBJECT;
103 $flink = new Foreign_link();
104 $conn = &$flink->getDatabaseConnection();
106 $flink->service = TWITTER_SERVICE;
107 $flink->orderBy('last_noticesync');
112 while ($flink->fetch()) {
114 if (($flink->noticesync & FOREIGN_NOTICE_RECV) ==
115 FOREIGN_NOTICE_RECV) {
116 $flinks[] = clone($flink);
117 common_log(LOG_INFO, "sync: foreign id $flink->foreign_id");
119 common_log(LOG_INFO, "nothing to sync");
127 unset($_DB_DATAOBJECT['CONNECTIONS']);
132 function childTask($flink) {
133 // Each child ps needs its own DB connection
135 // Note: DataObject::getDatabaseConnection() creates
136 // a new connection if there isn't one already
137 $conn = &$flink->getDatabaseConnection();
139 $this->getTimeline($flink);
141 $flink->last_friendsync = common_sql_now();
146 // XXX: Couldn't find a less brutal way to blow
147 // away a cached connection
148 global $_DB_DATAOBJECT;
149 unset($_DB_DATAOBJECT['CONNECTIONS']);
152 function getTimeline($flink)
155 common_log(LOG_WARNING, $this->name() .
156 " - Can't retrieve Foreign_link for foreign ID $fid");
160 common_debug($this->name() . ' - Trying to get timeline for Twitter user ' .
165 if (TwitterOAuthClient::isPackedToken($flink->credentials)) {
166 $token = TwitterOAuthClient::unpackToken($flink->credentials);
167 $client = new TwitterOAuthClient($token->key, $token->secret);
168 common_debug($this->name() . ' - Grabbing friends timeline with OAuth.');
170 common_debug("Skipping friends timeline for $flink->foreign_id since not OAuth.");
175 $lastId = Twitter_synch_status::getLastId($flink->foreign_id, 'home_timeline');
177 common_debug("Got lastId value '{$lastId}' for foreign id '{$flink->foreign_id}' and timeline 'home_timeline'");
180 $timeline = $client->statusesHomeTimeline($lastId);
181 } catch (Exception $e) {
182 common_log(LOG_WARNING, $this->name() .
183 ' - Twitter client unable to get friends timeline for user ' .
184 $flink->user_id . ' - code: ' .
185 $e->getCode() . 'msg: ' . $e->getMessage());
188 if (empty($timeline)) {
189 common_log(LOG_WARNING, $this->name() . " - Empty timeline.");
193 common_debug(LOG_INFO, $this->name() . ' - Retrieved ' . sizeof($timeline) . ' statuses from Twitter.');
195 // Reverse to preserve order
197 foreach (array_reverse($timeline) as $status) {
198 // Hacktastic: filter out stuff coming from this StatusNet
199 $source = mb_strtolower(common_config('integration', 'source'));
201 if (preg_match("/$source/", mb_strtolower($status->source))) {
202 common_debug($this->name() . ' - Skipping import of status ' .
203 $status->id . ' with source ' . $source);
207 // Don't save it if the user is protected
208 // FIXME: save it but treat it as private
209 if ($status->user->protected) {
213 $notice = $this->saveStatus($status);
215 if (!empty($notice)) {
216 Inbox::insertNotice($flink->user_id, $notice->id);
220 if (!empty($timeline)) {
221 $lastId = twitter_id($timeline[0]);
222 Twitter_synch_status::setLastId($flink->foreign_id, 'home_timeline', $lastId);
223 common_debug("Set lastId value '$lastId' for foreign id '{$flink->foreign_id}' and timeline 'home_timeline'");
226 // Okay, record the time we synced with Twitter for posterity
227 $flink->last_noticesync = common_sql_now();
231 function saveStatus($status)
233 $profile = $this->ensureProfile($status->user);
235 if (empty($profile)) {
236 common_log(LOG_ERR, $this->name() .
237 ' - Problem saving notice. No associated Profile.');
241 $statusUri = $this->makeStatusURI($status->user->screen_name, $status->id);
243 // check to see if we've already imported the status
244 $n2s = Notice_to_status::staticGet('status_id', $status->id);
250 " - Ignoring duplicate import: {$status->id}"
252 return Notice::staticGet('id', $n2s->notice_id);
255 // If it's a retweet, save it as a repeat!
256 if (!empty($status->retweeted_status)) {
257 common_log(LOG_INFO, "Status {$status->id} is a retweet of {$status->retweeted_status->id}.");
258 $original = $this->saveStatus($status->retweeted_status);
259 if (empty($original)) {
262 $author = $original->getProfile();
263 // TRANS: Message used to repeat a notice. RT is the abbreviation of 'retweet'.
264 // TRANS: %1$s is the repeated user's name, %2$s is the repeated notice.
265 $content = sprintf(_m('RT @%1$s %2$s'),
269 if (Notice::contentTooLong($content)) {
270 $contentlimit = Notice::maxContent();
271 $content = mb_substr($content, 0, $contentlimit - 4) . ' ...';
274 $repeat = Notice::saveNew($profile->id,
277 array('repeat_of' => $original->id,
279 'is_local' => Notice::GATEWAY));
280 common_log(LOG_INFO, "Saved {$repeat->id} as a repeat of {$original->id}");
281 Notice_to_status::saveNew($repeat->id, $status->id);
286 $notice = new Notice();
288 $notice->profile_id = $profile->id;
289 $notice->uri = $statusUri;
290 $notice->url = $statusUri;
291 $notice->created = strftime(
293 strtotime($status->created_at)
296 $notice->source = 'twitter';
298 $notice->reply_to = null;
300 if (!empty($status->in_reply_to_status_id)) {
301 common_log(LOG_INFO, "Status {$status->id} is a reply to status {$status->in_reply_to_status_id}");
302 $n2s = Notice_to_status::staticGet('status_id', $status->in_reply_to_status_id);
304 common_log(LOG_INFO, "Couldn't find local notice for status {$status->in_reply_to_status_id}");
306 $reply = Notice::staticGet('id', $n2s->notice_id);
308 common_log(LOG_INFO, "Couldn't find local notice for status {$status->in_reply_to_status_id}");
310 common_log(LOG_INFO, "Found local notice {$reply->id} for status {$status->in_reply_to_status_id}");
311 $notice->reply_to = $reply->id;
312 $notice->conversation = $reply->conversation;
317 if (empty($notice->conversation)) {
318 $conv = Conversation::create();
319 $notice->conversation = $conv->id;
320 common_log(LOG_INFO, "No known conversation for status {$status->id} so making a new one {$conv->id}.");
323 $notice->is_local = Notice::GATEWAY;
325 $notice->content = html_entity_decode($status->text, ENT_QUOTES, 'UTF-8');
326 $notice->rendered = $this->linkify($status);
328 if (Event::handle('StartNoticeSave', array(&$notice))) {
330 $id = $notice->insert();
333 common_log_db_error($notice, 'INSERT', __FILE__);
334 common_log(LOG_ERR, $this->name() .
335 ' - Problem saving notice.');
338 Event::handle('EndNoticeSave', array($notice));
341 Notice_to_status::saveNew($notice->id, $status->id);
343 $this->saveStatusMentions($notice, $status);
345 $notice->blowOnInsert();
351 * Make an URI for a status.
353 * @param object $status status object
357 function makeStatusURI($username, $id)
359 return 'http://twitter.com/'
366 * Look up a Profile by profileurl field. Profile::staticGet() was
367 * not working consistently.
369 * @param string $nickname local nickname of the Twitter user
370 * @param string $profileurl the profile url
372 * @return mixed value the first Profile with that url, or null
374 function getProfileByUrl($nickname, $profileurl)
376 $profile = new Profile();
377 $profile->nickname = $nickname;
378 $profile->profileurl = $profileurl;
381 if ($profile->find()) {
390 * Check to see if this Twitter status has already been imported
392 * @param Profile $profile Twitter user's local profile
393 * @param string $statusUri URI of the status on Twitter
395 * @return mixed value a matching Notice or null
397 function checkDupe($profile, $statusUri)
399 $notice = new Notice();
400 $notice->uri = $statusUri;
401 $notice->profile_id = $profile->id;
404 if ($notice->find()) {
412 function ensureProfile($user)
414 // check to see if there's already a profile for this user
415 $profileurl = 'http://twitter.com/' . $user->screen_name;
416 $profile = $this->getProfileByUrl($user->screen_name, $profileurl);
418 if (!empty($profile)) {
419 common_debug($this->name() .
420 " - Profile for $profile->nickname found.");
422 // Check to see if the user's Avatar has changed
424 $this->checkAvatar($user, $profile);
428 common_debug($this->name() . ' - Adding profile and remote profile ' .
429 "for Twitter user: $profileurl.");
431 $profile = new Profile();
432 $profile->query("BEGIN");
434 $profile->nickname = $user->screen_name;
435 $profile->fullname = $user->name;
436 $profile->homepage = $user->url;
437 $profile->bio = $user->description;
438 $profile->location = $user->location;
439 $profile->profileurl = $profileurl;
440 $profile->created = common_sql_now();
443 $id = $profile->insert();
444 } catch(Exception $e) {
445 common_log(LOG_WARNING, $this->name . ' Couldn\'t insert profile - ' . $e->getMessage());
449 common_log_db_error($profile, 'INSERT', __FILE__);
450 $profile->query("ROLLBACK");
454 // check for remote profile
456 $remote_pro = Remote_profile::staticGet('uri', $profileurl);
458 if (empty($remote_pro)) {
459 $remote_pro = new Remote_profile();
461 $remote_pro->id = $id;
462 $remote_pro->uri = $profileurl;
463 $remote_pro->created = common_sql_now();
466 $rid = $remote_pro->insert();
467 } catch (Exception $e) {
468 common_log(LOG_WARNING, $this->name() . ' Couldn\'t save remote profile - ' . $e->getMessage());
472 common_log_db_error($profile, 'INSERT', __FILE__);
473 $profile->query("ROLLBACK");
478 $profile->query("COMMIT");
480 $this->saveAvatars($user, $id);
486 function checkAvatar($twitter_user, $profile)
490 $path_parts = pathinfo($twitter_user->profile_image_url);
492 $newname = 'Twitter_' . $twitter_user->id . '_' .
493 $path_parts['basename'];
495 $oldname = $profile->getAvatar(48)->filename;
497 if ($newname != $oldname) {
498 common_debug($this->name() . ' - Avatar for Twitter user ' .
499 "$profile->nickname has changed.");
500 common_debug($this->name() . " - old: $oldname new: $newname");
502 $this->updateAvatars($twitter_user, $profile);
505 if ($this->missingAvatarFile($profile)) {
506 common_debug($this->name() . ' - Twitter user ' .
508 ' is missing one or more local avatars.');
509 common_debug($this->name() ." - old: $oldname new: $newname");
511 $this->updateAvatars($twitter_user, $profile);
515 function updateAvatars($twitter_user, $profile) {
519 $path_parts = pathinfo($twitter_user->profile_image_url);
521 $img_root = substr($path_parts['basename'], 0, -11);
522 $ext = $path_parts['extension'];
523 $mediatype = $this->getMediatype($ext);
525 foreach (array('mini', 'normal', 'bigger') as $size) {
526 $url = $path_parts['dirname'] . '/' .
527 $img_root . '_' . $size . ".$ext";
528 $filename = 'Twitter_' . $twitter_user->id . '_' .
529 $img_root . "_$size.$ext";
531 $this->updateAvatar($profile->id, $size, $mediatype, $filename);
532 $this->fetchAvatar($url, $filename);
536 function missingAvatarFile($profile) {
537 foreach (array(24, 48, 73) as $size) {
538 $filename = $profile->getAvatar($size)->filename;
539 $avatarpath = Avatar::path($filename);
540 if (file_exists($avatarpath) == FALSE) {
547 function getMediatype($ext)
551 switch (strtolower($ext)) {
553 $mediatype = 'image/jpg';
556 $mediatype = 'image/gif';
559 $mediatype = 'image/png';
565 function saveAvatars($user, $id)
569 $path_parts = pathinfo($user->profile_image_url);
570 $ext = $path_parts['extension'];
571 $end = strlen('_normal' . $ext);
572 $img_root = substr($path_parts['basename'], 0, -($end+1));
573 $mediatype = $this->getMediatype($ext);
575 foreach (array('mini', 'normal', 'bigger') as $size) {
576 $url = $path_parts['dirname'] . '/' .
577 $img_root . '_' . $size . ".$ext";
578 $filename = 'Twitter_' . $user->id . '_' .
579 $img_root . "_$size.$ext";
581 if ($this->fetchAvatar($url, $filename)) {
582 $this->newAvatar($id, $size, $mediatype, $filename);
584 common_log(LOG_WARNING, $id() .
585 " - Problem fetching Avatar: $url");
590 function updateAvatar($profile_id, $size, $mediatype, $filename) {
592 common_debug($this->name() . " - Updating avatar: $size");
594 $profile = Profile::staticGet($profile_id);
596 if (empty($profile)) {
597 common_debug($this->name() . " - Couldn't get profile: $profile_id!");
601 $sizes = array('mini' => 24, 'normal' => 48, 'bigger' => 73);
602 $avatar = $profile->getAvatar($sizes[$size]);
604 // Delete the avatar, if present
609 $this->newAvatar($profile->id, $size, $mediatype, $filename);
612 function newAvatar($profile_id, $size, $mediatype, $filename)
616 $avatar = new Avatar();
617 $avatar->profile_id = $profile_id;
622 $avatar->height = 24;
626 $avatar->height = 48;
629 // Note: Twitter's big avatars are a different size than
630 // StatusNet's (StatusNet's = 96)
632 $avatar->height = 73;
635 $avatar->original = 0; // we don't have the original
636 $avatar->mediatype = $mediatype;
637 $avatar->filename = $filename;
638 $avatar->url = Avatar::url($filename);
640 $avatar->created = common_sql_now();
643 $id = $avatar->insert();
644 } catch (Exception $e) {
645 common_log(LOG_WARNING, $this->name() . ' Couldn\'t insert avatar - ' . $e->getMessage());
649 common_log_db_error($avatar, 'INSERT', __FILE__);
653 common_debug($this->name() .
654 " - Saved new $size avatar for $profile_id.");
660 * Fetch a remote avatar image and save to local storage.
662 * @param string $url avatar source URL
663 * @param string $filename bare local filename for download
664 * @return bool true on success, false on failure
666 function fetchAvatar($url, $filename)
668 common_debug($this->name() . " - Fetching Twitter avatar: $url");
670 $request = HTTPClient::start();
671 $response = $request->get($url);
672 if ($response->isOk()) {
673 $avatarfile = Avatar::path($filename);
674 $ok = file_put_contents($avatarfile, $response->getBody());
676 common_log(LOG_WARNING, $this->name() .
677 " - Couldn't open file $filename");
691 function linkify($status)
693 $text = $status->text;
695 if (empty($status->entities)) {
696 common_log(LOG_WARNING, "No entities data for {$status->id}; trying to fake up links ourselves.");
697 $text = common_replace_urls_callback($text, 'common_linkify');
698 $text = preg_replace('/(^|\"\;|\'|\(|\[|\{|\s+)#([\pL\pN_\-\.]{1,64})/e', "'\\1#'.TwitterStatusFetcher::tagLink('\\2')", $text);
699 $text = preg_replace('/(^|\s+)@([a-z0-9A-Z_]{1,64})/e', "'\\1@'.TwitterStatusFetcher::atLink('\\2')", $text);
703 // Move all the entities into order so we can
704 // replace them in reverse order and thus
705 // not mess up their indices
707 $toReplace = array();
709 if (!empty($status->entities->urls)) {
710 foreach ($status->entities->urls as $url) {
711 $toReplace[$url->indices[0]] = array(self::URL, $url);
715 if (!empty($status->entities->hashtags)) {
716 foreach ($status->entities->hashtags as $hashtag) {
717 $toReplace[$hashtag->indices[0]] = array(self::HASHTAG, $hashtag);
721 if (!empty($status->entities->user_mentions)) {
722 foreach ($status->entities->user_mentions as $mention) {
723 $toReplace[$mention->indices[0]] = array(self::MENTION, $mention);
727 // sort in reverse order by key
731 foreach ($toReplace as $part) {
732 list($type, $object) = $part;
735 $linkText = $this->makeUrlLink($object);
738 $linkText = $this->makeHashtagLink($object);
741 $linkText = $this->makeMentionLink($object);
746 $text = mb_substr($text, 0, $object->indices[0]) . $linkText . mb_substr($text, $object->indices[1]);
751 function makeUrlLink($object)
753 return "<a href='{$object->url}' class='extlink'>{$object->url}</a>";
756 function makeHashtagLink($object)
758 return "#" . self::tagLink($object->text);
761 function makeMentionLink($object)
763 return "@".self::atLink($object->screen_name, $object->name);
766 static function tagLink($tag)
768 return "<a href='https://twitter.com/search?q=%23{$tag}' class='hashtag'>{$tag}</a>";
771 static function atLink($screenName, $fullName=null)
773 if (!empty($fullName)) {
774 return "<a href='http://twitter.com/{$screenName}' title='{$fullName}'>{$screenName}</a>";
776 return "<a href='http://twitter.com/{$screenName}'>{$screenName}</a>";
780 function saveStatusMentions($notice, $status)
784 if (empty($status->entities) || empty($status->entities->user_mentions)) {
788 foreach ($status->entities->user_mentions as $mention) {
789 $flink = Foreign_link::getByForeignID($mention->id, TWITTER_SERVICE);
790 if (!empty($flink)) {
791 $user = User::staticGet('id', $flink->user_id);
793 $reply = new Reply();
794 $reply->notice_id = $notice->id;
795 $reply->profile_id = $user->id;
796 common_log(LOG_INFO, __METHOD__ . ": saving reply: notice {$notice->id} to profile {$user->id}");
797 $id = $reply->insert();
807 if (have_option('i')) {
808 $id = get_option_value('i');
809 } else if (have_option('--id')) {
810 $id = get_option_value('--id');
811 } else if (count($args) > 0) {
817 if (have_option('d') || have_option('debug')) {
821 $fetcher = new TwitterStatusFetcher($id, 60, 2, $debug);