define('NOTICE_INBOX_SOURCE_PROFILE_TAG', 5);
define('NOTICE_INBOX_SOURCE_GATEWAY', -1);
+/**
+ * StatusNet had this string as valid path characters: '\pN\pL\,\!\(\)\.\:\-\_\+\/\=\&\;\%\~\*\$\'\@'
+ * Some of those characters can be troublesome when auto-linking plain text. Such as "http://some.com/)"
+ * URL encoding should be used whenever a weird character is used, the following strings are not definitive.
+ */
+define('URL_REGEX_VALID_PATH_CHARS', '\pN\pL\,\!\.\:\-\_\+\/\@\=\;\%\~\*');
+define('URL_REGEX_VALID_QSTRING_CHARS', URL_REGEX_VALID_PATH_CHARS . '\&');
+define('URL_REGEX_VALID_FRAGMENT_CHARS', URL_REGEX_VALID_QSTRING_CHARS . '\?\#');
+define('URL_REGEX_EXCLUDED_END_CHARS', '\?\.\,\!\#\:\''); // don't include these if they are directly after a URL
+define('URL_REGEX_DOMAIN_NAME', '(?:(?!-)[A-Za-z0-9\-]{1,63}(?<!-)\.)+[A-Za-z]{2,10}');
+
// append our extlib dir as the last-resort place to find libs
set_include_path(get_include_path() . PATH_SEPARATOR . INSTALLDIR . '/extlib/');
static function extractWebfingerIds($text)
{
$wmatches = array();
- $result = preg_match_all('/(?:^|\s+)@((?:\w+[\w\-\_\.]?)*(?:[\w\-\_\.]*\w+)@(?:(?!-)[A-Za-z0-9\-]{1,63}(?<!-)\.)+[A-Za-z]{2,10})/',
+ $result = preg_match_all('/(?:^|\s+)@((?:\w+[\w\-\_\.]?)*(?:[\w\-\_\.]*\w+)@'.URL_REGEX_DOMAIN_NAME.')/',
$text,
$wmatches,
PREG_OFFSET_CAPTURE);
static function extractUrlMentions($text)
{
$wmatches = array();
- $result = preg_match_all('!(?:^|\s+)@((?:\w+\.)*\w+(?:\w+\-\w+)*\.\w+(?:/\w+)*)!',
+ $result = preg_match_all('!(?:^|\s+)@'.URL_REGEX_DOMAIN_NAME.'(?:/\w+)*)!',
$text,
$wmatches,
PREG_OFFSET_CAPTURE);