]> git.mxchange.org Git - quix0rs-gnu-social.git/blobdiff - lib/util.php
Merge branch '0.8.x' of git://gitorious.org/~brion/statusnet/brion-fixes into 0.8.x
[quix0rs-gnu-social.git] / lib / util.php
index 748c8332f09839cedfcd9a83adbb0403764f55c9..b831859e993392d14de493f3ae349d9e61e919e2 100644 (file)
@@ -1,7 +1,7 @@
 <?php
 /*
- * Laconica - a distributed open-source microblogging tool
- * Copyright (C) 2008, 2009, Control Yourself, Inc.
+ * StatusNet - the distributed open-source microblogging tool
+ * Copyright (C) 2008, 2009, StatusNet, Inc.
  *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU Affero General Public License as published by
@@ -54,12 +54,12 @@ function common_init_language()
     $language = common_language();
     // So we don't have to make people install the gettext locales
     $locale_set = common_init_locale($language);
-    bindtextdomain("laconica", common_config('site','locale_path'));
-    bind_textdomain_codeset("laconica", "UTF-8");
-    textdomain("laconica");
+    bindtextdomain("statusnet", common_config('site','locale_path'));
+    bind_textdomain_codeset("statusnet", "UTF-8");
+    textdomain("statusnet");
     setlocale(LC_CTYPE, 'C');
     if(!$locale_set) {
-        common_log(LOG_INFO,'Language requested:'.$language.' - locale could not be set:',__FILE__);
+        common_log(LOG_INFO, 'Language requested:' . $language . ' - locale could not be set. Perhaps that system locale is not installed.', __FILE__);
     }
 }
 
@@ -404,7 +404,7 @@ function common_render_text($text)
 
     $r = preg_replace('/[\x{0}-\x{8}\x{b}-\x{c}\x{e}-\x{19}]/', '', $r);
     $r = common_replace_urls_callback($r, 'common_linkify');
-    $r = preg_replace('/(^|\(|\[|\s+)#([\pL\pN_\-\.]{1,64})/e', "'\\1#'.common_tag_link('\\2')", $r);
+    $r = preg_replace('/(^|\&quot\;|\'|\(|\[|\{|\s+)#([\pL\pN_\-\.]{1,64})/e', "'\\1#'.common_tag_link('\\2')", $r);
     // XXX: machine tags
     return $r;
 }
@@ -412,57 +412,107 @@ function common_render_text($text)
 function common_replace_urls_callback($text, $callback, $notice_id = null) {
     // Start off with a regex
     $regex = '#'.
-    '(?:^|\s+)('.
+    '(?:^|[\s\(\)\[\]\{\}\\\'\\\";]+)(?![\@\!\#])'.
+    '('.
         '(?:'.
-            '(?:https?|ftps?|mms|rtsp|gopher|news|nntp|telnet|wais|file|prospero|webcal|irc)://'.
-            '|'.
-            '(?:mailto|aim|tel|xmpp):'.
-        ')?'.
-        '(?:'.
-        '(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)'. //IPv4
-        '|(?:'.
-            '(?:[0-9a-f]{1,4}:){1,1}(?::[0-9a-f]{1,4}){1,6}|'. //IPv6
-            '(?:[0-9a-f]{1,4}:){1,2}(?::[0-9a-f]{1,4}){1,5}|'.
-            '(?:[0-9a-f]{1,4}:){1,3}(?::[0-9a-f]{1,4}){1,4}|'.
-            '(?:[0-9a-f]{1,4}:){1,4}(?::[0-9a-f]{1,4}){1,3}|'.
-            '(?:[0-9a-f]{1,4}:){1,5}(?::[0-9a-f]{1,4}){1,2}|'.
-            '(?:[0-9a-f]{1,4}:){1,6}(?::[0-9a-f]{1,4}){1,1}|'.
-            '(?:(?:[0-9a-f]{1,4}:){1,7}|:):|'.
-            ':(?::[0-9a-f]{1,4}){1,7}|'.
-            '(?:(?:(?:[0-9a-f]{1,4}:){6})(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3})|'.
-            '(?:(?:[0-9a-f]{1,4}:){5}[0-9a-f]{1,4}:(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3})|'.
-            '(?:[0-9a-f]{1,4}:){5}:[0-9a-f]{1,4}:(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}|'.
-            '(?:[0-9a-f]{1,4}:){1,1}(?::[0-9a-f]{1,4}){1,4}:(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}|'.
-            '(?:[0-9a-f]{1,4}:){1,2}(?::[0-9a-f]{1,4}){1,3}:(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}|'.
-            '(?:[0-9a-f]{1,4}:){1,3}(?::[0-9a-f]{1,4}){1,2}:(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}|'.
-            '(?:[0-9a-f]{1,4}:){1,4}(?::[0-9a-f]{1,4}){1,1}:(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}|'.
-            '(?:(?:[0-9a-f]{1,4}:){1,5}|:):(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}|'.
-            ':(?::[0-9a-f]{1,4}){1,5}:(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}'.
-        ')|'.
-        '(?:[^.\s/:]+\.)+'. //DNS
-        '(?:museum|travel|onion|[a-z]{2,4})'.
+            '(?:'. //Known protocols
+                '(?:'.
+                    '(?:(?:https?|ftps?|mms|rtsp|gopher|news|nntp|telnet|wais|file|prospero|webcal|irc)://)'.
+                    '|'.
+                    '(?:(?:mailto|aim|tel|xmpp):)'.
+                ')'.
+                '(?:[\pN\pL\-\_\+\%\~]+(?::[\pN\pL\-\_\+\%\~]+)?\@)?'. //user:pass@
+                '(?:'.
+                    '(?:'.
+                        '\[[\pN\pL\-\_\:\.]+(?<![\.\:])\]'. //[dns]
+                    ')|(?:'.
+                        '[\pN\pL\-\_\:\.]+(?<![\.\:])'. //dns
+                    ')'.
+                ')'.
+            ')'.
+            '|(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)'. //IPv4
+            '|(?:'. //IPv6
+                '\[?(?:(?:(?:[0-9A-Fa-f]{1,4}:){7}(?:(?:[0-9A-Fa-f]{1,4})|:))|(?:(?:[0-9A-Fa-f]{1,4}:){6}(?::|(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})|(?::[0-9A-Fa-f]{1,4})))|(?:(?:[0-9A-Fa-f]{1,4}:){5}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:){4}(?::[0-9A-Fa-f]{1,4}){0,1}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:){3}(?::[0-9A-Fa-f]{1,4}){0,2}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:){2}(?::[0-9A-Fa-f]{1,4}){0,3}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:)(?::[0-9A-Fa-f]{1,4}){0,4}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?::(?::[0-9A-Fa-f]{1,4}){0,5}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})))\]?(?<!:)'.
+            ')|(?:'. //DNS
+                '(?:[\pN\pL\-\_\+\%\~]+(?:\:[\pN\pL\-\_\+\%\~]+)?\@)?'. //user:pass@
+                '[\pN\pL\-\_]+(?:\.[\pN\pL\-\_]+)*\.'.
+                //tld list from http://data.iana.org/TLD/tlds-alpha-by-domain.txt, also added local, loc, and onion
+                '(?:AC|AD|AE|AERO|AF|AG|AI|AL|AM|AN|AO|AQ|AR|ARPA|AS|ASIA|AT|AU|AW|AX|AZ|BA|BB|BD|BE|BF|BG|BH|BI|BIZ|BJ|BM|BN|BO|BR|BS|BT|BV|BW|BY|BZ|CA|CAT|CC|CD|CF|CG|CH|CI|CK|CL|CM|CN|CO|COM|COOP|CR|CU|CV|CX|CY|CZ|DE|DJ|DK|DM|DO|DZ|EC|EDU|EE|EG|ER|ES|ET|EU|FI|FJ|FK|FM|FO|FR|GA|GB|GD|GE|GF|GG|GH|GI|GL|GM|GN|GOV|GP|GQ|GR|GS|GT|GU|GW|GY|HK|HM|HN|HR|HT|HU|ID|IE|IL|IM|IN|INFO|INT|IO|IQ|IR|IS|IT|JE|JM|JO|JOBS|JP|KE|KG|KH|KI|KM|KN|KP|KR|KW|KY|KZ|LA|LB|LC|LI|LK|LR|LS|LT|LU|LV|LY|MA|MC|MD|ME|MG|MH|MIL|MK|ML|MM|MN|MO|MOBI|MP|MQ|MR|MS|MT|MU|MUSEUM|MV|MW|MX|MY|MZ|NA|NAME|NC|NE|NET|NF|NG|NI|NL|NO|NP|NR|NU|NZ|OM|ORG|PA|PE|PF|PG|PH|PK|PL|PM|PN|PR|PRO|PS|PT|PW|PY|QA|RE|RO|RS|RU|RW|SA|SB|SC|SD|SE|SG|SH|SI|SJ|SK|SL|SM|SN|SO|SR|ST|SU|SV|SY|SZ|TC|TD|TEL|TF|TG|TH|TJ|TK|TL|TM|TN|TO|TP|TR|TRAVEL|TT|TV|TW|TZ|UA|UG|UK|US|UY|UZ|VA|VC|VE|VG|VI|VN|VU|WF|WS|XN--0ZWM56D|测试|XN--11B5BS3A9AJ6G|परीक्षा|XN--80AKHBYKNJ4F|испытание|XN--9T4B11YI5A|테스트|XN--DEBA0AD|טעסט|XN--G6W251D|測試|XN--HGBK6AJ7F53BBA|آزمایشی|XN--HLCJ6AYA9ESC7A|பரிட்சை|XN--JXALPDLP|δοκιμή|XN--KGBECHTV|إختبار|XN--ZCKZAH|テスト|YE|YT|YU|ZA|ZM|ZW|local|loc|onion)'.
+            ')(?![\pN\pL\-\_])'.
         ')'.
-        '(?:[:/][^\s]*)?'.
+        '(?:'.
+            '(?:\:\d+)?'. //:port
+            '(?:/[\pN\pL$\[\]\,\!\(\)\.\:\-\_\+\/\=\&\;\%\~\*\$\+\'\"@]*)?'. // /path
+            '(?:\?[\pN\pL\$\[\]\,\!\(\)\.\:\-\_\+\/\=\&\;\%\~\*\$\+\'\"@\/]*)?'. // ?query string
+            '(?:\#[\pN\pL$\[\]\,\!\(\)\.\:\-\_\+\/\=\&\;\%\~\*\$\+\'\"\@/\?\#]*)?'. // #fragment
+        ')(?<![\?\.\,\#\,])'.
     ')'.
-    '#ix';
-    preg_match_all($regex, $text, $matches);
-    // Then clean up what the regex left behind
-    $offset = 0;
-    foreach($matches[1] as $url) {
-        // Call user specified func
-        if (empty($notice_id)) {
-            $modified_url = call_user_func($callback, $url);
-        } else {
-            $modified_url = call_user_func($callback, array($url, $notice_id));
+    '#ixu';
+    //preg_match_all($regex,$text,$matches);
+    //print_r($matches);
+    return preg_replace_callback($regex, curry('callback_helper',$callback,$notice_id) ,$text);
+}
+
+function callback_helper($matches, $callback, $notice_id) {
+    $url=$matches[1];
+    $left = strpos($matches[0],$url);
+    $right = $left+strlen($url);
+
+    $groupSymbolSets=array(
+        array(
+            'left'=>'(',
+            'right'=>')'
+        ),
+        array(
+            'left'=>'[',
+            'right'=>']'
+        ),
+        array(
+            'left'=>'{',
+            'right'=>'}'
+        )
+    );
+    $cannotEndWith=array('.','?',',','#');
+    $original_url=$url;
+    do{
+        $original_url=$url;
+        foreach($groupSymbolSets as $groupSymbolSet){
+            if(substr($url,-1)==$groupSymbolSet['right']){
+                $group_left_count = substr_count($url,$groupSymbolSet['left']);
+                $group_right_count = substr_count($url,$groupSymbolSet['right']);
+                if($group_left_count<$group_right_count){
+                    $right-=1;
+                    $url=substr($url,0,-1);
+                }
+            }
         }
+        if(in_array(substr($url,-1),$cannotEndWith)){
+            $right-=1;
+            $url=substr($url,0,-1);
+        }
+    }while($original_url!=$url);
 
-        // Replace it!
-        $start = mb_strpos($text, $url, $offset);
-        $text = mb_substr($text, 0, $start).$modified_url.mb_substr($text, $start + mb_strlen($url), mb_strlen($text));
-        $offset = $start + mb_strlen($modified_url);
+    if(empty($notice_id)){
+        $result = call_user_func_array($callback,$url);
+    }else{
+        $result = call_user_func_array($callback, array(array($url,$notice_id)) );
     }
+    return substr($matches[0],0,$left) . $result . substr($matches[0],$right);
+}
 
-    return $text;
+function curry($fn) {
+    //TODO switch to a PHP 5.3 function closure based approach if PHP 5.3 is used
+    $args = func_get_args();
+    array_shift($args);
+    $id = uniqid('_partial');
+    $GLOBALS[$id] = array($fn, $args);
+    return create_function('',
+                           '$args = func_get_args(); '.
+                           'return call_user_func_array('.
+                           '$GLOBALS["'.$id.'"][0],'.
+                           'array_merge('.
+                           '$args,'.
+                           '$GLOBALS["'.$id.'"][1]));');
 }
 
 function common_linkify($url) {
@@ -470,6 +520,11 @@ function common_linkify($url) {
     // functions
     $url = htmlspecialchars_decode($url);
 
+   if(strpos($url, '@') !== false && strpos($url, ':') === false) {
+       //url is an email address without the mailto: protocol
+       return XMLStringer::estring('a', array('href' => "mailto:$url", 'rel' => 'external'), $url);
+   }
+
     $canon = File_redirection::_canonUrl($url);
 
     $longurl_data = File_redirection::where($url);
@@ -487,42 +542,31 @@ function common_linkify($url) {
     $attachment_id = null;
     $has_thumb = false;
 
-    // Check to see whether there's a filename associated with this URL.
-    // If there is, it's an upload and qualifies as an attachment
+    // Check to see whether this is a known "attachment" URL.
 
-    $localfile = File::staticGet('url', $longurl);
+    $f = File::staticGet('url', $longurl);
 
-    if (!empty($localfile)) {
-        if (isset($localfile->filename)) {
-            $is_attachment = true;
-            $attachment_id = $localfile->id;
-        }
+    if (empty($f)) {
+        // XXX: this writes to the database. :<
+        $f = File::processNew($longurl);
     }
 
-    // if this URL is an attachment, then we set class='attachment' and id='attahcment-ID'
-    // where ID is the id of the attachment for the given URL.
-    //
-    // we need a better test telling what can be shown as an attachment
-    // we're currently picking up oembeds only.
-    // I think the best option is another file_view table in the db
-    // and associated dbobject.
-
-    $query = "select file_oembed.file_id as file_id from file join file_oembed on file.id = file_oembed.file_id where file.url='$longurl'";
-    $file = new File;
-    $file->query($query);
-    $file->fetch();
-
-    if (!empty($file->file_id)) {
-        $is_attachment = true;
-        $attachment_id = $file->file_id;
-
-        $query = "select file_thumbnail.file_id as file_id from file join file_thumbnail on file.id = file_thumbnail.file_id where file.url='$longurl'";
-        $file2 = new File;
-        $file2->query($query);
-        $file2->fetch();
-
-        if (!empty($file2)) {
-            $has_thumb = true;
+    if (!empty($f)) {
+        if ($f->isEnclosure()) {
+            $is_attachment = true;
+            $attachment_id = $f->id;
+        } else {
+            $foe = File_oembed::staticGet('file_id', $f->id);
+            if (!empty($foe)) {
+                // if it has OEmbed info, it's an attachment, too
+                $is_attachment = true;
+                $attachment_id = $f->id;
+
+                $thumb = File_thumbnail::staticGet('file_id', $f->id);
+                if (!empty($thumb)) {
+                    $has_thumb = true;
+                }
+            }
         }
     }
 
@@ -1259,7 +1303,7 @@ function common_cache_key($extra)
         $base_key = common_keyize(common_config('site', 'name'));
     }
 
-    return 'laconica:' . $base_key . ':' . $extra;
+    return 'statusnet:' . $base_key . ':' . $extra;
 }
 
 function common_keyize($str)
@@ -1322,7 +1366,7 @@ function common_shorten_url($long_url)
 
     $curlh = curl_init();
     curl_setopt($curlh, CURLOPT_CONNECTTIMEOUT, 20); // # seconds to wait
-    curl_setopt($curlh, CURLOPT_USERAGENT, 'Laconica');
+    curl_setopt($curlh, CURLOPT_USERAGENT, 'StatusNet');
     curl_setopt($curlh, CURLOPT_RETURNTRANSFER, true);
 
     switch($svc) {