X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=src%2FUtil%2FStrings.php;h=d6583b9c61c0eb29993ee350d2eb2683e3592c3a;hb=0c738c4f8bd820519a4b46b27f419dbc921721f5;hp=3febe60bd8b4139a29927a5564a49ca2b4b53df2;hpb=5276c28a78e188eb4ec8dcbf87a4dd1ac193c8d7;p=friendica.git diff --git a/src/Util/Strings.php b/src/Util/Strings.php index 3febe60bd8..d6583b9c61 100644 --- a/src/Util/Strings.php +++ b/src/Util/Strings.php @@ -5,17 +5,21 @@ namespace Friendica\Util; +use Friendica\Content\ContactSelector; +use Friendica\Core\Logger; + /** * @brief This class handles string functions */ class Strings { - /** - * @brief Generates a pseudo-random string of hexadecimal characters - * - * @param int $size - * @return string - */ + /** + * @brief Generates a pseudo-random string of hexadecimal characters + * + * @param int $size + * @return string + * @throws \Exception + */ public static function getRandomHex($size = 64) { $byte_size = ceil($size / 2); @@ -28,14 +32,7 @@ class Strings } /** - * This is our primary input filter. - * - * The high bit hack only involved some old IE browser, forget which (IE5/Mac?) - * that had an XSS attack vector due to stripping the high-bit on an 8-bit character - * after cleansing, and angle chars with the high bit set could get through as markup. - * - * This is now disabled because it was interfering with some legitimate unicode sequences - * and hopefully there aren't a lot of those browsers left. + * @brief This is our primary input filter. * * Use this on any text input where angle chars are not valid or permitted * They will be replaced with safer brackets. This may be filtered further @@ -44,7 +41,7 @@ class Strings * @param string $string Input string * @return string Filtered string */ - public static function removeTags($string) + public static function escapeTags($string) { return str_replace(["<", ">"], ['[', ']'], $string); } @@ -56,7 +53,7 @@ class Strings * * @return string */ - public static function escapeTags($string) + public static function escapeHtml($string) { return htmlspecialchars($string, ENT_COMPAT, 'UTF-8', false); } @@ -143,19 +140,20 @@ class Strings return $word; } - /** - * @brief translate and format the networkname of a contact - * - * @param string $network Networkname of the contact (e.g. dfrn, rss and so on) - * @param string $url The contact url - * - * @return string Formatted network name - */ - public static function formatNetworkName($network, $url = 0) + /** + * Translate and format the network name of a contact + * + * @param string $network Network name of the contact (e.g. dfrn, rss and so on) + * @param string $url The contact url + * + * @return string Formatted network name + * @throws \Friendica\Network\HTTPException\InternalServerErrorException + */ + public static function formatNetworkName($network, $url = '') { - if ($network != "") { - if ($url != "") { - $network_name = ''.ContactSelector::networkToName($network, $url).""; + if ($network != '') { + if ($url != '') { + $network_name = '' . ContactSelector::networkToName($network, $url) . ''; } else { $network_name = ContactSelector::networkToName($network); } @@ -165,7 +163,7 @@ class Strings } /** - * @brief Remove intentation from a text + * @brief Remove indentation from a text * * @param string $text String to be transformed. * @param string $chr Optional. Indentation tag. Default tab (\t). @@ -244,12 +242,13 @@ class Strings return $s; } - /** - * @brief Decode Base64 Encoded URL and translate -_ to +/ - * @param string $s URL to decode - * - * @return string Decoded URL - */ + /** + * @brief Decode Base64 Encoded URL and translate -_ to +/ + * @param string $s URL to decode + * + * @return string Decoded URL + * @throws \Exception + */ public static function base64UrlDecode($s) { if (is_array($s)) { @@ -276,92 +275,6 @@ class Strings return base64_decode(strtr($s, '-_', '+/')); } - /** - * @brief Pull out all #hashtags and @person tags from $string. - * - * We also get @person@domain.com - which would make - * the regex quite complicated as tags can also - * end a sentence. So we'll run through our results - * and strip the period from any tags which end with one. - * Returns array of tags found, or empty array. - * - * @param string $string Post content - * - * @return array List of tag and person names - */ - public static function getTags($string) - { - $ret = []; - - // Convert hashtag links to hashtags - $string = preg_replace('/#\[url\=([^\[\]]*)\](.*?)\[\/url\]/ism', '#$2', $string); - - // ignore anything in a code block - $string = preg_replace('/\[code\](.*?)\[\/code\]/sm', '', $string); - - // Force line feeds at bbtags - $string = str_replace(['[', ']'], ["\n[", "]\n"], $string); - - // ignore anything in a bbtag - $string = preg_replace('/\[(.*?)\]/sm', '', $string); - - // Match full names against @tags including the space between first and last - // We will look these up afterward to see if they are full names or not recognisable. - - if (preg_match_all('/(@[^ \x0D\x0A,:?]+ [^ \x0D\x0A@,:?]+)([ \x0D\x0A@,:?]|$)/', $string, $matches)) { - foreach ($matches[1] as $match) { - if (strstr($match, ']')) { - // we might be inside a bbcode color tag - leave it alone - continue; - } - - if (substr($match, -1, 1) === '.') { - $ret[] = substr($match, 0, -1); - } else { - $ret[] = $match; - } - } - } - - // Otherwise pull out single word tags. These can be @nickname, @first_last - // and #hash tags. - - if (preg_match_all('/([!#@][^\^ \x0D\x0A,;:?]+)([ \x0D\x0A,;:?]|$)/', $string, $matches)) { - foreach ($matches[1] as $match) { - if (strstr($match, ']')) { - // we might be inside a bbcode color tag - leave it alone - continue; - } - if (substr($match, -1, 1) === '.') { - $match = substr($match,0,-1); - } - // ignore strictly numeric tags like #1 - if ((strpos($match, '#') === 0) && ctype_digit(substr($match, 1))) { - continue; - } - // try not to catch url fragments - if (strpos($string, $match) && preg_match('/[a-zA-z0-9\/]/', substr($string, strpos($string, $match) - 1, 1))) { - continue; - } - $ret[] = $match; - } - } - - return $ret; - } - - /** - * @brief Check for a valid email string - * - * @param string $email_address Email address to be evaluated. - * - * @return boolean Value indicating whether or not the string is a valid email address. - */ - public static function isValidEmail($email_address) - { - return preg_match('/^[_a-zA-Z0-9\-\+]+(\.[_a-zA-Z0-9\-\+]+)*@[a-zA-Z0-9-]+(\.[a-zA-Z0-9-]+)+$/', $email_address); - } - /** * @brief Normalize url * @@ -369,7 +282,7 @@ class Strings * * @return string Normalized URL. */ - public static function normaliseLink($url) // normalise_link() + public static function normaliseLink($url) { $ret = str_replace(['https:', '//www.'], ['http:', '//'], $url); return rtrim($ret, '/'); @@ -382,7 +295,7 @@ class Strings * * @return string normalized OpenId Identity */ - function normaliseOpenID($s) + public static function normaliseOpenID($s) { return trim(str_replace(['http://', 'https://'], ['', ''], $s), '/'); } @@ -402,4 +315,64 @@ class Strings { return (strcasecmp(self::normaliseLink($a), self::normaliseLink($b)) === 0); } + + + /** + * Ensures the provided URI has its query string punctuation in order. + * + * @param string $uri + * @return string + */ + public static function ensureQueryParameter($uri) + { + if (strpos($uri, '?') === false && ($pos = strpos($uri, '&')) !== false) { + $uri = substr($uri, 0, $pos) . '?' . substr($uri, $pos + 1); + } + + return $uri; + } + + + /** + * Check if the trimmed provided string is starting with one of the provided characters + * + * @param string $string + * @param array $chars + * @return bool + */ + public static function startsWith($string, array $chars) + { + $return = in_array(substr(trim($string), 0, 1), $chars); + + return $return; + } + + /** + * Returns the regular expression string to match URLs in a given text + * + * @return string + * @see https://daringfireball.net/2010/07/improved_regex_for_matching_urls + */ + public static function autoLinkRegEx() + { + return '@(?xi) +(??«»“”‘’.] # Domain can\'t start with a . + [^/\s`!()\[\]{};:\'",<>?«»“”‘’]+ # Domain can\'t end with a . + \. + [^/\s`!()\[\]{};:\'".,<>?«»“”‘’]+/? # Followed by a slash + ) + (?: # One or more: + [^\s()<>]+ # Run of non-space, non-()<> + | # or + \(([^\s()<>]+|(\([^\s()<>]+\)))*\) # balanced parens, up to 2 levels + | # or + [^\s`!()\[\]{};:\'".,<>?«»“”‘’] # not a space or one of these punct chars + )* +)@'; + } }