3 * @file src/Util/Strings.php
6 namespace Friendica\Util;
9 * @brief This class handles string functions
14 * @brief Generates a pseudo-random string of hexadecimal characters
19 public static function getRandomHex($size = 64)
21 $byte_size = ceil($size / 2);
23 $bytes = random_bytes($byte_size);
25 $return = substr(bin2hex($bytes), 0, $size);
31 * This is our primary input filter.
33 * The high bit hack only involved some old IE browser, forget which (IE5/Mac?)
34 * that had an XSS attack vector due to stripping the high-bit on an 8-bit character
35 * after cleansing, and angle chars with the high bit set could get through as markup.
37 * This is now disabled because it was interfering with some legitimate unicode sequences
38 * and hopefully there aren't a lot of those browsers left.
40 * Use this on any text input where angle chars are not valid or permitted
41 * They will be replaced with safer brackets. This may be filtered further
42 * if these are not allowed either.
44 * @param string $string Input string
45 * @return string Filtered string
47 public static function removeTags($string) // notags()
49 return str_replace(["<", ">"], ['[', ']'], $string);
53 * @brief Use this on "body" or "content" input where angle chars shouldn't be removed,
54 * and allow them to be safely displayed.
55 * @param string $string
59 public static function escapeTags($string) // escape_tags()
61 return htmlspecialchars($string, ENT_COMPAT, 'UTF-8', false);
65 * @brief Generate a string that's random, but usually pronounceable. Used to generate initial passwords
67 * @param int $len length
71 public static function getRandomName($len) // autoname()
77 $vowels = ['a', 'a', 'ai', 'au', 'e', 'e', 'e', 'ee', 'ea', 'i', 'ie', 'o', 'ou', 'u'];
79 if (mt_rand(0, 5) == 4) {
85 'c', 'ch', 'cl', 'cr',
88 'g', 'gh', 'gl', 'gr',
91 'k', 'kh', 'kl', 'kr',
95 'p', 'ph', 'pl', 'pr',
98 's' ,'sc', 'sh', 'sm', 'sp', 'st',
106 $midcons = ['ck', 'ct', 'gn', 'ld', 'lf', 'lm', 'lt', 'mb', 'mm', 'mn', 'mp',
107 'nd', 'ng', 'nk', 'nt', 'rn', 'rp', 'rt'];
109 $noend = ['bl', 'br', 'cl', 'cr', 'dr', 'fl', 'fr', 'gl', 'gr',
110 'kh', 'kl', 'kr', 'mn', 'pl', 'pr', 'rh', 'tr', 'qu', 'wh', 'q'];
112 $start = mt_rand(0, 2);
121 for ($x = 0; $x < $len; $x ++) {
122 $r = mt_rand(0, count($table) - 1);
125 if ($table == $vowels) {
126 $table = array_merge($cons, $midcons);
133 $word = substr($word, 0, $len);
135 foreach ($noend as $noe) {
136 $noelen = strlen($noe);
137 if ((strlen($word) > $noelen) && (substr($word, -$noelen) == $noe)) {
138 $word = self::getRandomName($len);
147 * @brief translate and format the networkname of a contact
149 * @param string $network Networkname of the contact (e.g. dfrn, rss and so on)
150 * @param string $url The contact url
152 * @return string Formatted network name
154 public static function formatNetworkName($network, $url = 0) // format_network_name()
156 if ($network != "") {
158 $network_name = '<a href="'.$url.'">'.ContactSelector::networkToName($network, $url)."</a>";
160 $network_name = ContactSelector::networkToName($network);
163 return $network_name;
168 * @brief Remove intentation from a text
170 * @param string $text String to be transformed.
171 * @param string $chr Optional. Indentation tag. Default tab (\t).
172 * @param int $count Optional. Default null.
174 * @return string Transformed string.
176 public static function deindent($text, $chr = "[\t ]", $count = NULL)
178 $lines = explode("\n", $text);
180 if (is_null($count)) {
183 while ($k < count($lines) && strlen($lines[$k]) == 0) {
186 preg_match("|^" . $chr . "*|", $lines[$k], $m);
187 $count = strlen($m[0]);
190 for ($k = 0; $k < count($lines); $k++) {
191 $lines[$k] = preg_replace("|^" . $chr . "{" . $count . "}|", "", $lines[$k]);
194 return implode("\n", $lines);
198 * @brief Get byte size returned in a Data Measurement (KB, MB, GB)
200 * @param int $bytes The number of bytes to be measured
201 * @param int $precision Optional. Default 2.
203 * @return string Size with measured units.
205 public static function formatBytes($bytes, $precision = 2)
207 $units = ['B', 'KB', 'MB', 'GB', 'TB'];
208 $bytes = max($bytes, 0);
209 $pow = floor(($bytes ? log($bytes) : 0) / log(1024));
210 $pow = min($pow, count($units) - 1);
211 $bytes /= pow(1024, $pow);
213 return round($bytes, $precision) . ' ' . $units[$pow];
217 * @brief Protect percent characters in sprintf calls
219 * @param string $s String to transform.
221 * @return string Transformed string.
223 public static function protectSprintf($s) // protect_sprintf()
225 return str_replace('%', '%%', $s);
229 * @brief Base64 Encode URL and translate +/ to -_ Optionally strip padding.
231 * @param string $s URL to encode
232 * @param boolean $strip_padding Optional. Default false
234 * @return string Encoded URL
236 public static function base64UrlEncode($s, $strip_padding = false) //base64url_encode()
238 $s = strtr(base64_encode($s), '+/', '-_');
240 if ($strip_padding) {
241 $s = str_replace('=', '', $s);
248 * @brief Decode Base64 Encoded URL and translate -_ to +/
249 * @param string $s URL to decode
251 * @return string Decoded URL
253 public static function base64url_decode($s) // base64url_decode()
256 Logger::log('base64url_decode: illegal input: ' . print_r(debug_backtrace(), true));
261 * // Placeholder for new rev of salmon which strips base64 padding.
262 * // PHP base64_decode handles the un-padded input without requiring this step
263 * // Uncomment if you find you need it.
266 * if (!strpos($s,'=')) {
276 return base64_decode(strtr($s, '-_', '+/'));
280 * @brief Pull out all #hashtags and @person tags from $string.
282 * We also get @person@domain.com - which would make
283 * the regex quite complicated as tags can also
284 * end a sentence. So we'll run through our results
285 * and strip the period from any tags which end with one.
286 * Returns array of tags found, or empty array.
288 * @param string $string Post content
290 * @return array List of tag and person names
292 public static function getTags($string) // get_tags()
296 // Convert hashtag links to hashtags
297 $string = preg_replace('/#\[url\=([^\[\]]*)\](.*?)\[\/url\]/ism', '#$2', $string);
299 // ignore anything in a code block
300 $string = preg_replace('/\[code\](.*?)\[\/code\]/sm', '', $string);
302 // Force line feeds at bbtags
303 $string = str_replace(['[', ']'], ["\n[", "]\n"], $string);
305 // ignore anything in a bbtag
306 $string = preg_replace('/\[(.*?)\]/sm', '', $string);
308 // Match full names against @tags including the space between first and last
309 // We will look these up afterward to see if they are full names or not recognisable.
311 if (preg_match_all('/(@[^ \x0D\x0A,:?]+ [^ \x0D\x0A@,:?]+)([ \x0D\x0A@,:?]|$)/', $string, $matches)) {
312 foreach ($matches[1] as $match) {
313 if (strstr($match, ']')) {
314 // we might be inside a bbcode color tag - leave it alone
318 if (substr($match, -1, 1) === '.') {
319 $ret[] = substr($match, 0, -1);
326 // Otherwise pull out single word tags. These can be @nickname, @first_last
329 if (preg_match_all('/([!#@][^\^ \x0D\x0A,;:?]+)([ \x0D\x0A,;:?]|$)/', $string, $matches)) {
330 foreach ($matches[1] as $match) {
331 if (strstr($match, ']')) {
332 // we might be inside a bbcode color tag - leave it alone
335 if (substr($match, -1, 1) === '.') {
336 $match = substr($match,0,-1);
338 // ignore strictly numeric tags like #1
339 if ((strpos($match, '#') === 0) && ctype_digit(substr($match, 1))) {
342 // try not to catch url fragments
343 if (strpos($string, $match) && preg_match('/[a-zA-z0-9\/]/', substr($string, strpos($string, $match) - 1, 1))) {
354 * @brief Check for a valid email string
356 * @param string $email_address Email address to be evaluated.
358 * @return boolean Value indicating whether or not the string is a valid email address.
360 public static function isValidEmail($email_address) // valid_email()
362 return preg_match('/^[_a-zA-Z0-9\-\+]+(\.[_a-zA-Z0-9\-\+]+)*@[a-zA-Z0-9-]+(\.[a-zA-Z0-9-]+)+$/', $email_address);
366 * @brief Normalize url
368 * @param string $url URL to be normalized.
370 * @return string Normalized URL.
372 public static function normaliseLink($url) // normalise_link()
374 $ret = str_replace(['https:', '//www.'], ['http:', '//'], $url);
375 return rtrim($ret, '/');
379 * @brief Normalize OpenID identity
381 * @param string $s OpenID Identity
383 * @return string normalized OpenId Identity
385 function normaliseOpenID($s) // normalize_openid()
387 return trim(str_replace(['http://', 'https://'], ['', ''], $s), '/');
391 * @brief Compare two URLs to see if they are the same, but ignore
392 * slight but hopefully insignificant differences such as if one
393 * is https and the other isn't, or if one is www.something and
394 * the other isn't - and also ignore case differences.
396 * @param string $a first url
397 * @param string $b second url
398 * @return boolean True if the URLs match, otherwise False
401 public static function compareLink($a, $b) // link_compare()
403 return (strcasecmp(normalise_link($a), normalise_link($b)) === 0);