return (!is_null(common_current_user()));
}
+function common_local_referer()
+{
+ return parse_url($_SERVER['HTTP_REFERER'], PHP_URL_HOST) === common_config('site', 'server');
+}
+
function common_have_session()
{
return (0 != strcmp(session_id(), ''));
$expiration,
$cookiepath,
$server,
- common_config('site', 'ssl')=='always');
+ GNUsocial::useHTTPS());
}
define('REMEMBERME', 'rememberme');
return $email;
}
+function common_to_alphanumeric($str)
+{
+ $filtered = preg_replace('/[^A-Za-z0-9]\s*/', '', $str);
+ if (strlen($filtered) < 1) {
+ throw new Exception('Filtered string was zero-length.');
+ }
+ return $filtered;
+}
+
function common_purify($html)
{
- require_once INSTALLDIR.'/extlib/htmLawed/htmLawed.php';
+ require_once INSTALLDIR.'/extlib/HTMLPurifier/HTMLPurifier.auto.php';
- $config = array('safe' => 1, // means that elements=* means elements=*-applet-embed-iframe-object-script or so
- 'elements' => '*',
- 'deny_attribute' => 'id,style,on*');
+ $cfg = HTMLPurifier_Config::createDefault();
+ $cfg->set('Attr.AllowedRel', ['bookmark', 'directory', 'enclosure', 'home', 'license', 'nofollow', 'payment', 'tag']); // http://microformats.org/wiki/rel
+ $cfg->set('HTML.ForbiddenAttributes', array('style')); // id, on* etc. are already filtered by default
+ $cfg->set('URI.AllowedSchemes', array_fill_keys(common_url_schemes(), true));
+ if (common_config('cache', 'dir')) {
+ $cfg->set('Cache.SerializerPath', common_config('cache', 'dir'));
+ }
- // Remove more elements than what the 'safe' filter gives (elements must be '*' before this)
- // http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed/htmLawed_README.htm#s3.6
+ // Remove more elements than what the default filter removes, default in GNU social are remotely
+ // linked resources such as img, video, audio
+ $forbiddenElements = array();
foreach (common_config('htmlfilter') as $tag=>$filter) {
if ($filter === true) {
- $config['elements'] .= "-{$tag}";
+ $forbiddenElements[] = $tag;
}
}
+ $cfg->set('HTML.ForbiddenElements', $forbiddenElements);
$html = common_remove_unicode_formatting($html);
- $purified = htmLawed($html, $config);
+ $purifier = new HTMLPurifier($cfg);
+ $purified = $purifier->purify($html);
Event::handle('EndCommonPurify', array(&$purified, $html));
return $purified;
if (Event::handle('StartFindMentions', array($sender, $text, &$mentions))) {
// Get the context of the original notice, if any
$origMentions = array();
-
// Does it have a parent notice for context?
if ($parent instanceof Notice) {
foreach ($parent->getAttentionProfiles() as $repliedTo) {
if (!$repliedTo->isPerson()) {
continue;
}
- $origMentions[$repliedTo->getNickname()] = $repliedTo;
+ $origMentions[$repliedTo->id] = $repliedTo;
}
}
continue;
}
- // Try to get a profile for this nickname.
- // Start with conversation context, then go to
- // sender context.
+ // primarily mention the profiles mentioned in the parent
+ $mention_found_in_origMentions = false;
+ foreach($origMentions as $origMentionsId=>$origMention) {
+ if($origMention->getNickname() == $nickname) {
+ $mention_found_in_origMentions = $origMention;
+ // don't mention same twice! the parent might have mentioned
+ // two users with same nickname on different instances
+ unset($origMentions[$origMentionsId]);
+ break;
+ }
+ }
- if ($parent instanceof Notice && $parent->getProfile()->getNickname() === $nickname) {
+ // Try to get a profile for this nickname.
+ // Start with parents mentions, then go to parents sender context
+ if ($mention_found_in_origMentions) {
+ $mentioned = $mention_found_in_origMentions;
+ } else if ($parent instanceof Notice && $parent->getProfile()->getNickname() === $nickname) {
$mentioned = $parent->getProfile();
- } else if (!empty($origMentions) &&
- array_key_exists($nickname, $origMentions)) {
- $mentioned = $origMentions[$nickname];
} else {
// sets to null if no match
$mentioned = common_relative_profile($sender, $nickname);
if ($mentioned instanceof Profile) {
try {
- $url = $mentioned->getUrl();
+ $url = $mentioned->getUri(); // prefer the URI as URL, if it is one.
+ if (!common_valid_http_url($url)) {
+ $url = $mentioned->getUrl();
+ }
} catch (InvalidUrlException $e) {
$url = common_local_url('userbyid', array('id' => $mentioned->getID()));
}
PREG_OFFSET_CAPTURE);
$atmatches = array();
- preg_match_all('/(?:^|\s+)@(' . Nickname::DISPLAY_FMT . ')\b/',
+ // the regexp's "(?!\@)" makes sure it doesn't matches the single "@remote" in "@remote@server.com"
+ preg_match_all('/(?:^|\s+)@(' . Nickname::DISPLAY_FMT . ')\b(?!\@)/',
$text,
$atmatches,
PREG_OFFSET_CAPTURE);
return $text;
}
+define('_URL_SCHEME_COLON_DOUBLE_SLASH', 1);
+define('_URL_SCHEME_SINGLE_COLON', 2);
+define('_URL_SCHEME_NO_DOMAIN', 4);
+define('_URL_SCHEME_COLON_COORDINATES', 8);
+
+function common_url_schemes($filter=null)
+{
+ // TODO: move these to $config
+ $schemes = [
+ 'http' => _URL_SCHEME_COLON_DOUBLE_SLASH,
+ 'https' => _URL_SCHEME_COLON_DOUBLE_SLASH,
+ 'ftp' => _URL_SCHEME_COLON_DOUBLE_SLASH,
+ 'ftps' => _URL_SCHEME_COLON_DOUBLE_SLASH,
+ 'mms' => _URL_SCHEME_COLON_DOUBLE_SLASH,
+ 'rtsp' => _URL_SCHEME_COLON_DOUBLE_SLASH,
+ 'gopher' => _URL_SCHEME_COLON_DOUBLE_SLASH,
+ 'news' => _URL_SCHEME_COLON_DOUBLE_SLASH,
+ 'nntp' => _URL_SCHEME_COLON_DOUBLE_SLASH,
+ 'telnet' => _URL_SCHEME_COLON_DOUBLE_SLASH,
+ 'wais' => _URL_SCHEME_COLON_DOUBLE_SLASH,
+ 'file' => _URL_SCHEME_COLON_DOUBLE_SLASH,
+ 'prospero' => _URL_SCHEME_COLON_DOUBLE_SLASH,
+ 'webcal' => _URL_SCHEME_COLON_DOUBLE_SLASH,
+ 'irc' => _URL_SCHEME_COLON_DOUBLE_SLASH,
+ 'ircs' => _URL_SCHEME_COLON_DOUBLE_SLASH,
+ 'aim' => _URL_SCHEME_SINGLE_COLON,
+ 'bitcoin' => _URL_SCHEME_SINGLE_COLON,
+ 'fax' => _URL_SCHEME_SINGLE_COLON,
+ 'jabber' => _URL_SCHEME_SINGLE_COLON,
+ 'mailto' => _URL_SCHEME_SINGLE_COLON,
+ 'tel' => _URL_SCHEME_SINGLE_COLON,
+ 'xmpp' => _URL_SCHEME_SINGLE_COLON,
+ 'magnet' => _URL_SCHEME_NO_DOMAIN,
+ 'geo' => _URL_SCHEME_COLON_COORDINATES,
+ ];
+
+ return array_keys(
+ array_filter($schemes,
+ function ($scheme) use ($filter) {
+ return is_null($filter) || ($scheme & $filter);
+ })
+ );
+}
+
/**
* Find links in the given text and pass them to the given callback function.
*
* @param mixed $arg: optional argument will be passed on to the callback
*/
function common_replace_urls_callback($text, $callback, $arg = null) {
+ $geouri_labeltext_regex = '\pN\pL\-';
+ $geouri_mark_regex = '\-\_\.\!\~\*\\\'\(\)'; // the \\\' is really pretty
+ $geouri_unreserved_regex = '\pN\pL' . $geouri_mark_regex;
+ $geouri_punreserved_regex = '\[\]\:\&\+\$';
+ $geouri_pctencoded_regex = '(?:\%[0-9a-fA-F][0-9a-fA-F])';
+ $geouri_paramchar_regex = $geouri_unreserved_regex . $geouri_punreserved_regex; //FIXME: add $geouri_pctencoded_regex here so it works
+
// Start off with a regex
$regex = '#'.
'(?:^|[\s\<\>\(\)\[\]\{\}\\\'\\\";]+)(?![\@\!\#])'.
'(?:'.
'(?:'. //Known protocols
'(?:'.
- '(?:(?:https?|ftps?|mms|rtsp|gopher|news|nntp|telnet|wais|file|prospero|webcal|ircs?)://)'.
+ '(?:(?:' . implode('|', common_url_schemes(_URL_SCHEME_COLON_DOUBLE_SLASH)) . ')://)'.
'|'.
- '(?:(?:mailto|aim|tel|xmpp):)'.
+ '(?:(?:' . implode('|', common_url_schemes(_URL_SCHEME_SINGLE_COLON)) . '):)'.
')'.
'(?:[\pN\pL\-\_\+\%\~]+(?::[\pN\pL\-\_\+\%\~]+)?\@)?'. //user:pass@
'(?:'.
')'.
')'.
')'.
- '|(?:(?:magnet):)'. // URLs without domain name
- '|(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)'. //IPv4
- '|(?:'. //IPv6
- '\[?(?:(?:(?:[0-9A-Fa-f]{1,4}:){7}(?:(?:[0-9A-Fa-f]{1,4})|:))|(?:(?:[0-9A-Fa-f]{1,4}:){6}(?::|(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})|(?::[0-9A-Fa-f]{1,4})))|(?:(?:[0-9A-Fa-f]{1,4}:){5}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:){4}(?::[0-9A-Fa-f]{1,4}){0,1}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:){3}(?::[0-9A-Fa-f]{1,4}){0,2}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:){2}(?::[0-9A-Fa-f]{1,4}){0,3}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:)(?::[0-9A-Fa-f]{1,4}){0,4}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?::(?::[0-9A-Fa-f]{1,4}){0,5}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})))\]?(?<!:)'.
+ '|(?:'.
+ '(?:' . implode('|', common_url_schemes(_URL_SCHEME_COLON_COORDINATES)) . '):'.
+ // There's an order that must be followed here too, if ;crs= is used, it must precede ;u=
+ // Also 'crsp' (;crs=$crsp) must match $geouri_labeltext_regex
+ // Also 'uval' (;u=$uval) must be a pnum: \-?[0-9]+
+ '(?:'.
+ '(?:[0-9]+(?:\.[0-9]+)?(?:\,[0-9]+(?:\.[0-9]+)?){1,2})'. // 1(.23)?(,4(.56)){1,2}
+ '(?:\;(?:['.$geouri_labeltext_regex.']+)(?:\=['.$geouri_paramchar_regex.']+)*)*'.
+ ')'.
')'.
+ // URLs without domain name, like magnet:?xt=...
+ '|(?:(?:' . implode('|', common_url_schemes(_URL_SCHEME_NO_DOMAIN)) . '):(?=\?))'. // zero-length lookahead requires ? after :
+ (common_config('linkify', 'bare_ipv4') // Convert IPv4 addresses to hyperlinks
+ ? '|(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)'
+ : '').
+ (common_config('linkify', 'bare_ipv6') // Convert IPv6 addresses to hyperlinks
+ ? '|(?:'. //IPv6
+ '\[?(?:(?:(?:[0-9A-Fa-f]{1,4}:){7}(?:(?:[0-9A-Fa-f]{1,4})|:))|(?:(?:[0-9A-Fa-f]{1,4}:){6}(?::|(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})|(?::[0-9A-Fa-f]{1,4})))|(?:(?:[0-9A-Fa-f]{1,4}:){5}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:){4}(?::[0-9A-Fa-f]{1,4}){0,1}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:){3}(?::[0-9A-Fa-f]{1,4}){0,2}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:){2}(?::[0-9A-Fa-f]{1,4}){0,3}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:)(?::[0-9A-Fa-f]{1,4}){0,4}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?::(?::[0-9A-Fa-f]{1,4}){0,5}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})))\]?(?<!:)'.
+ ')'
+ : '').
(common_config('linkify', 'bare_domains')
? '|(?:'. //DNS
'(?:[\pN\pL\-\_\+\%\~]+(?:\:[\pN\pL\-\_\+\%\~]+)?\@)?'. //user:pass@
$r = Router::get();
$path = $r->build($action, $args, $params, $fragment);
- $ssl = common_config('site', 'ssl') === 'always'
- || GNUsocial::isHTTPS()
- || common_is_sensitive($action);
+ $ssl = GNUsocial::useHTTPS();
if (common_config('site','fancy')) {
$url = common_path($path, $ssl, $addSession);
return $url;
}
-function common_is_sensitive($action)
-{
- static $sensitive = array(
- 'login',
- 'register',
- 'passwordsettings',
- 'api',
- 'ApiOAuthRequestToken',
- 'ApiOAuthAccessToken',
- 'ApiOAuthAuthorize',
- 'ApiOAuthPin',
- 'showapplication'
- );
- $ssl = null;
-
- if (Event::handle('SensitiveAction', array($action, &$ssl))) {
- $ssl = in_array($action, $sensitive);
- }
-
- return $ssl;
-}
-
function common_path($relative, $ssl=false, $addSession=true)
{
$pathpart = (common_config('site', 'path')) ? common_config('site', 'path')."/" : '';
- if (($ssl && (common_config('site', 'ssl') === 'sometimes'))
- || GNUsocial::isHTTPS()
- || common_config('site', 'ssl') === 'always') {
+ if ($ssl && GNUsocial::useHTTPS()) {
$proto = 'https';
if (is_string(common_config('site', 'sslserver')) &&
mb_strlen(common_config('site', 'sslserver')) > 0) {
return $proto.'://'.$serverpart.'/'.$pathpart.$relative;
}
+// FIXME: Maybe this should also be able to handle non-fancy URLs with index.php?p=...
+function common_fake_local_fancy_url($url)
+{
+ /**
+ * This is a hacky fix to make URIs generated with "index.php/" match against
+ * locally stored URIs without that. So for example if the remote site is looking
+ * up the webfinger for some user and for some reason knows about https://some.example/user/1
+ * but we locally store and report only https://some.example/index.php/user/1 then they would
+ * dismiss the profile for not having an identified alias.
+ *
+ * There are various live instances where these issues occur, for various reasons.
+ * Most of them being users fiddling with configuration while already having
+ * started federating (distributing the URI to other servers) or maybe manually
+ * editing the local database.
+ */
+ if (!preg_match(
+ // [1] protocol part, we can only rewrite http/https anyway.
+ '/^(https?:\/\/)' .
+ // [2] site name.
+ // FIXME: Dunno how this acts if we're aliasing ourselves with a .onion domain etc.
+ '('.preg_quote(common_config('site', 'server'), '/').')' .
+ // [3] site path, or if that is empty just '/' (to retain the /)
+ '('.preg_quote(common_config('site', 'path') ?: '/', '/').')' .
+ // [4] + [5] extract index.php (+ possible leading double /) and the rest of the URL separately.
+ '(\/?index\.php\/)(.*)$/', $url, $matches)) {
+ // if preg_match failed to match
+ throw new Exception('No known change could be made to the URL.');
+ }
+
+ // now reconstruct the URL with everything except the "index.php/" part
+ $fancy_url = '';
+ foreach ([1,2,3,5] as $idx) {
+ $fancy_url .= $matches[$idx];
+ }
+ return $fancy_url;
+}
+
+// FIXME: Maybe this should also be able to handle non-fancy URLs with index.php?p=...
+function common_fake_local_nonfancy_url($url)
+{
+ /**
+ * This is a hacky fix to make URIs NOT generated with "index.php/" match against
+ * locally stored URIs WITH that. The reverse from the above.
+ *
+ * It will also "repair" index.php URLs with multiple / prepended. Like https://some.example///index.php/user/1
+ */
+ if (!preg_match(
+ // [1] protocol part, we can only rewrite http/https anyway.
+ '/^(https?:\/\/)' .
+ // [2] site name.
+ // FIXME: Dunno how this acts if we're aliasing ourselves with a .onion domain etc.
+ '('.preg_quote(common_config('site', 'server'), '/').')' .
+ // [3] site path, or if that is empty just '/' (to retain the /)
+ '('.preg_quote(common_config('site', 'path') ?: '/', '/').')' .
+ // [4] should be empty (might contain one or more / and then maybe also index.php). Will be overwritten.
+ // [5] will have the extracted actual URL part (besides site path)
+ '((?!index.php\/)\/*(?:index.php\/)?)(.*)$/', $url, $matches)) {
+ // if preg_match failed to match
+ throw new Exception('No known change could be made to the URL.');
+ }
+
+ $matches[4] = 'index.php/'; // inject the index.php/ rewritethingy
+
+ // remove the first element, which is the full matching string
+ array_shift($matches);
+ return implode($matches);
+}
+
function common_inject_session($url, $serverpart = null)
{
if (!common_have_session()) {
function common_valid_http_url($url, $secure=false)
{
+ if (empty($url)) {
+ return false;
+ }
+
// If $secure is true, only allow https URLs to pass
// (if false, we use '?' in 'https?' to say the 's' is optional)
$regex = $secure ? '/^https$/' : '/^https?$/';