From 3b7ee5a5f9a2cd3066aff8a7a12d09878be3b06c Mon Sep 17 00:00:00 2001 From: Robin Millette Date: Wed, 13 May 2009 14:27:32 -0400 Subject: [PATCH] rewrote short url stuff to handle new file/url classes (redirections, oembed, mimetypes, etc.) --- actions/newnotice.php | 18 +-- classes/File.php | 65 ++++++++++ classes/File_oembed.php | 43 +++++++ classes/File_redirection.php | 231 +++++++++++++++++++++++++++++++++++ classes/File_to_post.php | 17 +++ classes/Notice.php | 6 +- classes/laconica.ini | 64 ---------- classes/laconica.links.ini | 59 +-------- lib/Shorturl_api.php | 3 +- lib/util.php | 112 +++-------------- 10 files changed, 382 insertions(+), 236 deletions(-) diff --git a/actions/newnotice.php b/actions/newnotice.php index 8b03abc62c..ae0ff96363 100644 --- a/actions/newnotice.php +++ b/actions/newnotice.php @@ -158,7 +158,8 @@ class NewnoticeAction extends Action $replyto = 'false'; } - $notice = Notice::saveNew($user->id, $content, 'web', 1, +// $notice = Notice::saveNew($user->id, $content_shortened, 'web', 1, + $notice = Notice::saveNew($user->id, $content_shortened, 'web', 1, ($replyto == 'false') ? null : $replyto); if (is_string($notice)) { @@ -203,25 +204,12 @@ class NewnoticeAction extends Action * @return void */ function saveUrls($notice) { - common_debug("Saving all URLs"); common_replace_urls_callback($notice->content, array($this, 'saveUrl'), $notice->id); } function saveUrl($data) { list($url, $notice_id) = $data; - common_debug("Saving $url for $notice_id"); - $file = File::staticGet('url', $url); - if (empty($file)) { - common_debug('unknown file/url'); - $file = new File; - $file->url = $url; - $file->insert(); - } - common_debug('File: ' . print_r($file, true)); - $f2p = new File_to_post; - $f2p->file_id = $file->id; - $f2p->post_id = $notice_id; - $f2p->insert(); + $zzz = File::processNew($url, $notice_id); } /** diff --git a/classes/File.php b/classes/File.php index 8dd017b79b..2ddc5deb8b 100644 --- a/classes/File.php +++ b/classes/File.php @@ -20,6 +20,11 @@ if (!defined('LACONICA')) { exit(1); } require_once INSTALLDIR.'/classes/Memcached_DataObject.php'; +require_once INSTALLDIR.'/classes/File_redirection.php'; +require_once INSTALLDIR.'/classes/File_oembed.php'; +require_once INSTALLDIR.'/classes/File_thumbnail.php'; +require_once INSTALLDIR.'/classes/File_to_post.php'; +//require_once INSTALLDIR.'/classes/File_redirection.php'; /** * Table Definition for file @@ -44,4 +49,64 @@ class File extends Memcached_DataObject /* the code above is auto generated do not remove the tag below */ ###END_AUTOCODE + + function isProtected($url) { + return 'http://www.facebook.com/login.php' === $url; + } + + function saveNew($redir_data, $given_url) { + $x = new File; + $x->url = $given_url; + if (!empty($redir_data['protected'])) $x->protected = $redir_data['protected']; + if (!empty($redir_data['title'])) $x->title = $redir_data['title']; + if (!empty($redir_data['type'])) $x->mimetype = $redir_data['type']; + if (!empty($redir_data['size'])) $x->size = intval($redir_data['size']); + if (isset($redir_data['time']) && $redir_data['time'] > 0) $x->date = intval($redir_data['time']); + $file_id = $x->insert(); + + if (isset($redir_data['type']) + && ('text/html' === substr($redir_data['type'], 0, 9)) + && ($oembed_data = File_oembed::_getOembed($given_url)) + && isset($oembed_data['json'])) { + + File_oembed::saveNew($oembed_data['json'], $file_id); + } + return $x; + } + + function processNew($given_url, $notice_id) { + if (empty($given_url)) return -1; // error, no url to process + $given_url = File_redirection::_canonUrl($given_url); + if (empty($given_url)) return -1; // error, no url to process + $file = File::staticGet('url', $given_url); + if (empty($file->id)) { + $file_redir = File_redirection::staticGet('url', $given_url); + if (empty($file_redir->id)) { + $redir_data = File_redirection::where($given_url); + $redir_url = $redir_data['url']; + if ($redir_url === $given_url) { + $x = File::saveNew($redir_data, $given_url); + $file_id = $x->id; + + } else { + $x = File::processNew($redir_url, $notice_id); + $file_id = $x->id; + File_redirection::saveNew($redir_data, $file_id, $given_url); + } + } else { + $file_id = $file_redir->file_id; + } + } else { + $file_id = $file->id; + $x = $file; + } + + if (empty($x)) { + $x = File::staticGet($file_id); + if (empty($x)) die('Impossible!'); + } + + File_to_post::processNew($file_id, $notice_id); + return $x; + } } diff --git a/classes/File_oembed.php b/classes/File_oembed.php index 33dd8200c2..2846f49dbc 100644 --- a/classes/File_oembed.php +++ b/classes/File_oembed.php @@ -50,4 +50,47 @@ class File_oembed extends Memcached_DataObject /* the code above is auto generated do not remove the tag below */ ###END_AUTOCODE + + + function _getOembed($url, $maxwidth = 500, $maxheight = 400, $format = 'json') { + $cmd = 'http://oohembed.com/oohembed/?url=' . urlencode($url); + if (is_int($maxwidth)) $cmd .= "&maxwidth=$maxwidth"; + if (is_int($maxheight)) $cmd .= "&maxheight=$maxheight"; + if (is_string($format)) $cmd .= "&format=$format"; + $oe = @file_get_contents($cmd); + if (false === $oe) return false; + return array($format => (('json' === $format) ? json_decode($oe, true) : $oe)); + } + + function saveNew($data, $file_id) { + $file_oembed = new File_oembed; + $file_oembed->file_id = $file_id; + $file_oembed->version = $data['version']; + $file_oembed->type = $data['type']; + if (!empty($data['provider_name'])) $file_oembed->provider = $data['provider_name']; + if (!isset($file_oembed->provider) && !empty($data['provide'])) $file_oembed->provider = $data['provider']; + if (!empty($data['provide_url'])) $file_oembed->provider_url = $data['provider_url']; + if (!empty($data['width'])) $file_oembed->width = intval($data['width']); + if (!empty($data['height'])) $file_oembed->height = intval($data['height']); + if (!empty($data['html'])) $file_oembed->html = $data['html']; + if (!empty($data['title'])) $file_oembed->title = $data['title']; + if (!empty($data['author_name'])) $file_oembed->author_name = $data['author_name']; + if (!empty($data['author_url'])) $file_oembed->author_url = $data['author_url']; + if (!empty($data['url'])) $file_oembed->url = $data['url']; + $file_oembed->insert(); + + if (!empty($data['thumbnail_url'])) { + $tn = new File_thumbnail; + $tn->file_id = $file_id; + $tn->url = $data['thumbnail_url']; + $tn->width = intval($data['thumbnail_width']); + $tn->height = intval($data['thumbnail_height']); + $tn->insert(); + } + + + + } } + + diff --git a/classes/File_redirection.php b/classes/File_redirection.php index e2d1e69c38..a71d1c0831 100644 --- a/classes/File_redirection.php +++ b/classes/File_redirection.php @@ -20,6 +20,11 @@ if (!defined('LACONICA')) { exit(1); } require_once INSTALLDIR.'/classes/Memcached_DataObject.php'; +require_once INSTALLDIR.'/classes/File.php'; +require_once INSTALLDIR.'/classes/File_oembed.php'; + +define('USER_AGENT', 'Laconica user agent / file probe'); + /** * Table Definition for file_redirection @@ -42,4 +47,230 @@ class File_redirection extends Memcached_DataObject /* the code above is auto generated do not remove the tag below */ ###END_AUTOCODE + + + + function _commonCurl($url, $redirs) { + $curlh = curl_init(); + curl_setopt($curlh, CURLOPT_URL, $url); + curl_setopt($curlh, CURLOPT_AUTOREFERER, true); // # setup referer header when folowing redirects + curl_setopt($curlh, CURLOPT_CONNECTTIMEOUT, 10); // # seconds to wait + curl_setopt($curlh, CURLOPT_MAXREDIRS, $redirs); // # max number of http redirections to follow + curl_setopt($curlh, CURLOPT_USERAGENT, USER_AGENT); + curl_setopt($curlh, CURLOPT_FOLLOWLOCATION, true); // Follow redirects + curl_setopt($curlh, CURLOPT_RETURNTRANSFER, true); + curl_setopt($curlh, CURLOPT_FILETIME, true); + curl_setopt($curlh, CURLOPT_HEADER, true); // Include header in output + return $curlh; + } + + function _redirectWhere_imp($short_url, $redirs = 10, $protected = false) { + if ($redirs < 0) return false; + + // let's see if we know this... + $a = File::staticGet('url', $short_url); + if (empty($a->id)) { + $b = File_redirection::staticGet('url', $short_url); + if (empty($b->id)) { + // we'll have to figure it out + } else { + // this is a redirect to $b->file_id + $a = File::staticGet($b->file_id); + $url = $a->url; + } + } else { + // this is a direct link to $a->url + $url = $a->url; + } + if (isset($url)) { + return $url; + } + + + + $curlh = File_redirection::_commonCurl($short_url, $redirs); + // Don't include body in output + curl_setopt($curlh, CURLOPT_NOBODY, true); + curl_exec($curlh); + $info = curl_getinfo($curlh); + curl_close($curlh); + + if (405 == $info['http_code']) { + $curlh = File_redirection::_commonCurl($short_url, $redirs); + curl_exec($curlh); + $info = curl_getinfo($curlh); + curl_close($curlh); + } + + if (!empty($info['redirect_count']) && File::isProtected($info['url'])) { + return File_redirection::_redirectWhere_imp($short_url, $info['redirect_count'] - 1, true); + } + + $ret = array('code' => $info['http_code'] + , 'redirects' => $info['redirect_count'] + , 'url' => $info['url']); + + if (!empty($info['content_type'])) $ret['type'] = $info['content_type']; + if ($protected) $ret['protected'] = true; + if (!empty($info['download_content_length'])) $ret['size'] = $info['download_content_length']; + if (isset($info['filetime']) && ($info['filetime'] > 0)) $ret['time'] = $info['filetime']; + return $ret; + } + + function where($in_url) { + $ret = File_redirection::_redirectWhere_imp($in_url); + return $ret; + } + + function makeShort($long_url) { + $long_url = File_redirection::_canonUrl($long_url); + // do we already know this long_url and have a short redirection for it? + $file = new File; + $file_redir = new File_redirection; + $file->url = $long_url; + $file->joinAdd($file_redir); + $file->selectAdd('length(file_redirection.url) as len'); + $file->limit(1); + $file->orderBy('len'); + $file->find(true); + if (!empty($file->id)) { + return $file->url; + } + + // if yet unknown, we must find a short url according to user settings + $short_url = File_redirection::_userMakeShort($long_url, common_current_user()); + return $short_url; + } + + function _userMakeShort($long_url, $user) { + if (empty($user)) { + // common current user does not find a user when called from the XMPP daemon + // therefore we'll set one here fix, so that XMPP given URLs may be shortened + $user->urlshorteningservice = 'ur1.ca'; + } + $curlh = curl_init(); + curl_setopt($curlh, CURLOPT_CONNECTTIMEOUT, 20); // # seconds to wait + curl_setopt($curlh, CURLOPT_USERAGENT, 'Laconica'); + curl_setopt($curlh, CURLOPT_RETURNTRANSFER, true); + + switch($user->urlshorteningservice) { + case 'ur1.ca': + require_once INSTALLDIR.'/lib/Shorturl_api.php'; + $short_url_service = new LilUrl; + $short_url = $short_url_service->shorten($long_url); + break; + + case '2tu.us': + $short_url_service = new TightUrl; + require_once INSTALLDIR.'/lib/Shorturl_api.php'; + $short_url = $short_url_service->shorten($long_url); + break; + + case 'ptiturl.com': + require_once INSTALLDIR.'/lib/Shorturl_api.php'; + $short_url_service = new PtitUrl; + $short_url = $short_url_service->shorten($long_url); + break; + + case 'bit.ly': + curl_setopt($curlh, CURLOPT_URL, 'http://bit.ly/api?method=shorten&long_url='.urlencode($long_url)); + $short_url = current(json_decode(curl_exec($curlh))->results)->hashUrl; + break; + + case 'is.gd': + curl_setopt($curlh, CURLOPT_URL, 'http://is.gd/api.php?longurl='.urlencode($long_url)); + $short_url = curl_exec($curlh); + break; + case 'snipr.com': + curl_setopt($curlh, CURLOPT_URL, 'http://snipr.com/site/snip?r=simple&link='.urlencode($long_url)); + $short_url = curl_exec($curlh); + break; + case 'metamark.net': + curl_setopt($curlh, CURLOPT_URL, 'http://metamark.net/api/rest/simple?long_url='.urlencode($long_url)); + $short_url = curl_exec($curlh); + break; + case 'tinyurl.com': + curl_setopt($curlh, CURLOPT_URL, 'http://tinyurl.com/api-create.php?url='.urlencode($long_url)); + $short_url = curl_exec($curlh); + break; + default: + $short_url = false; + } + + curl_close($curlh); + + if ($short_url) { + $short_url = (string)$short_url; +if(1) { + // store it + $file = File::staticGet('url', $long_url); + if (empty($file)) { + $redir_data = File_redirection::where($long_url); + $file = File::saveNew($redir_data, $long_url); + $file_id = $file->id; + if (!empty($redir_data['oembed']['json'])) { + File_oembed::saveNew($redir_data['oembed']['json'], $file_id); + } + } else { + $file_id = $file->id; + } + $file_redir = File_redirection::staticGet('url', $short_url); + if (empty($file_redir)) { + $file_redir = new File_redirection; + $file_redir->url = $short_url; + $file_redir->file_id = $file_id; + $file_redir->insert(); + } } + return $short_url; + } + return $long_url; + } + + function _canonUrl($in_url, $default_scheme = 'http://') { + if (empty($in_url)) return false; + $out_url = $in_url; + $p = parse_url($out_url); + if (empty($p['host']) || empty($p['scheme'])) { + list($scheme) = explode(':', $in_url, 2); + switch ($scheme) { + case 'fax': + case 'tel': + $out_url = str_replace('.-()', '', $out_url); + break; + + case 'mailto': + case 'aim': + case 'jabber': + case 'xmpp': + // don't touch anything + break; + + default: + $out_url = $default_scheme . ltrim($out_url, '/'); + $p = parse_url($out_url); + if (empty($p['scheme'])) return false; + break; + } + } + + if (('ftp' == $p['scheme']) || ('http' == $p['scheme']) || ('https' == $p['scheme'])) { + if (empty($p['host'])) return false; + if (empty($p['path'])) { + $out_url .= '/'; + } + } + + return $out_url; + } + + function saveNew($data, $file_id, $url) { + $file_redir = new File_redirection; + $file_redir->url = $url; + $file_redir->file_id = $file_id; + $file_redir->redirections = intval($data['redirects']); + $file_redir->httpcode = intval($data['code']); + $file_redir->insert(); + } +} + diff --git a/classes/File_to_post.php b/classes/File_to_post.php index bd0528d98f..00ddebe6b8 100644 --- a/classes/File_to_post.php +++ b/classes/File_to_post.php @@ -40,4 +40,21 @@ class File_to_post extends Memcached_DataObject /* the code above is auto generated do not remove the tag below */ ###END_AUTOCODE + + function processNew($file_id, $notice_id) { + static $seen = array(); + if (empty($seen[$notice_id]) || !in_array($file_id, $seen[$notice_id])) { + $f2p = new File_to_post; + $f2p->file_id = $file_id; + $f2p->post_id = $notice_id; + $f2p->insert(); + if (empty($seen[$notice_id])) { + $seen[$notice_id] = array($file_id); + } else { + $seen[$notice_id][] = $file_id; + } + } + + } } + diff --git a/classes/Notice.php b/classes/Notice.php index 382d160ab3..c2fa2d19e5 100644 --- a/classes/Notice.php +++ b/classes/Notice.php @@ -124,7 +124,7 @@ class Notice extends Memcached_DataObject $profile = Profile::staticGet($profile_id); - $final = common_shorten_links($content); +// $final = common_shorten_links($content); if (!$profile) { common_log(LOG_ERR, 'Problem saving notice. Unknown user.'); @@ -167,8 +167,8 @@ class Notice extends Memcached_DataObject $notice->reply_to = $reply_to; $notice->created = common_sql_now(); - $notice->content = $final; - $notice->rendered = common_render_content($final, $notice); + $notice->content = $content; + $notice->rendered = common_render_content($content, $notice); $notice->source = $source; $notice->uri = $uri; diff --git a/classes/laconica.ini b/classes/laconica.ini index 00a1b89365..316923af02 100644 --- a/classes/laconica.ini +++ b/classes/laconica.ini @@ -1,67 +1,3 @@ - -[file] -id = 129 -url = 2 -mimetype = 2 -size = 1 -title = 2 -date = 1 -protected = 17 - -[file__keys] -id = K -url = U - -[file_oembed] -id = 129 -file_id = 1 -version = 2 -type = 2 -provider = 2 -provider_url = 2 -width = 1 -height = 1 -html = 66 -title = 2 -author_name = 2 -author_url = 2 -url = 2 - -[file_oembed__keys] -id = K -file_id = U - -[file_redirection] -id = 129 -url = 2 -file_id = 1 -redirections = 1 -httpcode = 1 - -[file_redirection__keys] -id = K -url = U - -[file_thumbnail] -id = 129 -file_id = 1 -url = 2 -width = 1 -height = 1 - -[file_thumbnail__keys] -id = K -file_id = U -url = U - -[file_to_post] -id = 129 -file_id = 1 -post_id = 1 - -[file_to_post__keys] -id = K - [avatar] profile_id = 129 original = 17 diff --git a/classes/laconica.links.ini b/classes/laconica.links.ini index bc52ce578e..95c63f3c09 100644 --- a/classes/laconica.links.ini +++ b/classes/laconica.links.ini @@ -53,62 +53,5 @@ file_id = file:id [file_to_post] file_id = file:id -post_id = post:id - -[avatar] -profile_id = profile:id - -[user] -id = profile:id -carrier = sms_carrier:id - -[remote_profile] -id = profile:id - -[notice] -profile_id = profile:id -reply_to = notice:id - -[reply] -notice_id = notice:id -profile_id = profile:id - -[token] -consumer_key = consumer:consumer_key - -[nonce] -consumer_key,token = token:consumer_key,token - -[user_openid] -user_id = user:id - -[confirm_address] -user_id = user:id - -[remember_me] -user_id = user:id - -[queue_item] -notice_id = notice:id - -[subscription] -subscriber = profile:id -subscribed = profile:id - -[fave] -notice_id = notice:id -user_id = user:id - -[file_oembed] -file_id = file:id - -[file_redirection] -file_id = file:id - -[file_thumbnail] -file_id = file:id - -[file_to_post] -file_id = file:id -post_id = post:id +post_id = notice:id diff --git a/lib/Shorturl_api.php b/lib/Shorturl_api.php index fe106cb837..924aa93a89 100644 --- a/lib/Shorturl_api.php +++ b/lib/Shorturl_api.php @@ -22,6 +22,7 @@ if (!defined('LACONICA')) { exit(1); } class ShortUrlApi { protected $service_url; + protected $long_limit = 27; function __construct($service_url) { @@ -39,7 +40,7 @@ class ShortUrlApi } private function is_long($url) { - return strlen($url) >= 30; + return strlen($url) >= $this->long_limit; } protected function http_post($data) { diff --git a/lib/util.php b/lib/util.php index c4a63a4412..25c0fb0a15 100644 --- a/lib/util.php +++ b/lib/util.php @@ -466,10 +466,10 @@ function common_replace_urls_callback($text, $callback, $notice_id = null) { $url = (mb_strpos($orig_url, htmlspecialchars($url)) === FALSE) ? $url:htmlspecialchars($url); // Call user specified func - if (isset($notice_id)) { - $modified_url = call_user_func($callback, array($url, $notice_id)); - } else { + if (empty($notice_id)) { $modified_url = call_user_func($callback, $url); + } else { + $modified_url = call_user_func($callback, array($url, $notice_id)); } // Replace it! @@ -485,107 +485,29 @@ function common_linkify($url) { // It comes in special'd, so we unspecial it before passing to the stringifying // functions $url = htmlspecialchars_decode($url); - $display = $url; - $url = (!preg_match('#^([a-z]+://|(mailto|aim|tel):)#i', $url)) ? 'http://'.$url : $url; - - $attrs = array('href' => $url, 'rel' => 'external'); + $display = File_redirection::_canonUrl($url); + $longurl_data = File_redirection::where($url); + if (is_array($longurl_data)) { + $longurl = $longurl_data['url']; + } elseif (is_string($longurl_data)) { + $longurl = $longurl_data; + } else { + die('impossible to linkify'); + } - if ($longurl = common_longurl($url)) { + $attrs = array('href' => $longurl, 'rel' => 'external'); +if(0){ + if ($longurl !== $url) { $attrs['title'] = $longurl; } - - return XMLStringer::estring('a', $attrs, $display); -} - -function common_longurl($short_url) -{ - $long_url = common_shorten_link($short_url, true); - if ($long_url === $short_url) return false; - return $long_url; } - -function common_longurl2($uri) -{ - $uri_e = urlencode($uri); - $longurl = unserialize(file_get_contents("http://api.longurl.org/v1/expand?format=php&url=$uri_e")); - if (empty($longurl['long_url']) || $uri === $longurl['long_url']) return false; - return stripslashes($longurl['long_url']); + return XMLStringer::estring('a', $attrs, $display); } function common_shorten_links($text) { if (mb_strlen($text) <= 140) return $text; - static $cache = array(); - if (isset($cache[$text])) return $cache[$text]; - // \s = not a horizontal whitespace character (since PHP 5.2.4) - return $cache[$text] = common_replace_urls_callback($text, 'common_shorten_link');; -} - -function common_shorten_link($url, $reverse = false) -{ - - static $url_cache = array(); - if ($reverse) return isset($url_cache[$url]) ? $url_cache[$url] : $url; - - $user = common_current_user(); - if (!isset($user)) { - // common current user does not find a user when called from the XMPP daemon - // therefore we'll set one here fix, so that XMPP given URLs may be shortened - $user->urlshorteningservice = 'ur1.ca'; - } - $curlh = curl_init(); - curl_setopt($curlh, CURLOPT_CONNECTTIMEOUT, 20); // # seconds to wait - curl_setopt($curlh, CURLOPT_USERAGENT, 'Laconica'); - curl_setopt($curlh, CURLOPT_RETURNTRANSFER, true); - - switch($user->urlshorteningservice) { - case 'ur1.ca': - $short_url_service = new LilUrl; - $short_url = $short_url_service->shorten($url); - break; - - case '2tu.us': - $short_url_service = new TightUrl; - $short_url = $short_url_service->shorten($url); - break; - - case 'ptiturl.com': - $short_url_service = new PtitUrl; - $short_url = $short_url_service->shorten($url); - break; - - case 'bit.ly': - curl_setopt($curlh, CURLOPT_URL, 'http://bit.ly/api?method=shorten&long_url='.urlencode($url)); - $short_url = current(json_decode(curl_exec($curlh))->results)->hashUrl; - break; - - case 'is.gd': - curl_setopt($curlh, CURLOPT_URL, 'http://is.gd/api.php?longurl='.urlencode($url)); - $short_url = curl_exec($curlh); - break; - case 'snipr.com': - curl_setopt($curlh, CURLOPT_URL, 'http://snipr.com/site/snip?r=simple&link='.urlencode($url)); - $short_url = curl_exec($curlh); - break; - case 'metamark.net': - curl_setopt($curlh, CURLOPT_URL, 'http://metamark.net/api/rest/simple?long_url='.urlencode($url)); - $short_url = curl_exec($curlh); - break; - case 'tinyurl.com': - curl_setopt($curlh, CURLOPT_URL, 'http://tinyurl.com/api-create.php?url='.urlencode($url)); - $short_url = curl_exec($curlh); - break; - default: - $short_url = false; - } - - curl_close($curlh); - - if ($short_url) { - $url_cache[(string)$short_url] = $url; - return (string)$short_url; - } - return $url; + return common_replace_urls_callback($text, array('File_redirection', 'makeShort')); } function common_xml_safe_str($str) -- 2.39.5