fix a warning and suggestions for pullrequest

[friendica.git] / src / Util / Strings.php
diff --git a/src/Util/Strings.php b/src/Util/Strings.php

index 5efc214afc95e3552948c37e3152c782f7d9cc7a..2405fbababcfdbcd8d6788f4ed28040c6b954c4f 100644 (file)
--- a/src/Util/Strings.php
+++ b/src/Util/Strings.php
@@ -1,51 +1,564 @@
  <?php
  /**
- * @file src/Util/Strings.php
+ * @copyright Copyright (C) 2010-2023, the Friendica project
+ *
+ * @license GNU AGPL version 3 or any later version
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
   */
  
  namespace Friendica\Util;
  
+use Friendica\Content\ContactSelector;
+use Friendica\Core\Logger;
+use Friendica\Core\System;
+use ParagonIE\ConstantTime\Base64;
+
  /**
- * @brief This class contains methods to modify/transform strings.
+ * This class handles string functions
   */
  class Strings
  {
-    /**
-        * escape text ($str) for XML transport
-        * @param string $str
-        * @return string Escaped text.
+       /**
+        * Generates a pseudo-random string of hexadecimal characters
+        *
+        * @param int $size Size of string (default: 64)
+        *
+        * @return string Pseudo-random string
+        * @throws \Exception
+        */
+       public static function getRandomHex(int $size = 64): string
+       {
+               $byte_size = ceil($size / 2);
+
+               $bytes = random_bytes($byte_size);
+
+               $return = substr(bin2hex($bytes), 0, $size);
+
+               return $return;
+       }
+
+       /**
+        * Checks, if the given string is a valid hexadecimal code
+        *
+        * @param string $hexCode
+        * @return bool
+        */
+       public static function isHex(string $hexCode): bool
+       {
+               return !empty($hexCode) ? @preg_match("/^[a-f0-9]{2,}$/i", $hexCode) && !(strlen($hexCode) & 1) : false;
+       }
+
+       /**
+        * Use this on "body" or "content" input where angle chars shouldn't be removed,
+        * and allow them to be safely displayed.
+        * @param string $string
+        *
+        * @return string
+        */
+       public static function escapeHtml($string)
+       {
+               return htmlspecialchars($string, ENT_COMPAT, 'UTF-8', false);
+       }
+
+       /**
+        * Generate a string that's random, but usually pronounceable. Used to generate initial passwords
+        *
+        * @param int $len      length
+        * @return string
+        */
+       public static function getRandomName(int $len): string
+       {
+               if ($len <= 0) {
+                       return '';
+               }
+
+               $vowels = ['a', 'a', 'ai', 'au', 'e', 'e', 'e', 'ee', 'ea', 'i', 'ie', 'o', 'ou', 'u'];
+
+               if (mt_rand(0, 5) == 4) {
+                       $vowels[] = 'y';
+               }
+
+               $cons = [
+                       'b', 'bl', 'br',
+                       'c', 'ch', 'cl', 'cr',
+                       'd', 'dr',
+                       'f', 'fl', 'fr',
+                       'g', 'gh', 'gl', 'gr',
+                       'h',
+                       'j',
+                       'k', 'kh', 'kl', 'kr',
+                       'l',
+                       'm',
+                       'n',
+                       'p', 'ph', 'pl', 'pr',
+                       'qu',
+                       'r', 'rh',
+                       's', 'sc', 'sh', 'sm', 'sp', 'st',
+                       't', 'th', 'tr',
+                       'v',
+                       'w', 'wh',
+                       'x',
+                       'z', 'zh'
+               ];
+
+               $midcons = [
+                       'ck', 'ct', 'gn', 'ld', 'lf', 'lm', 'lt', 'mb', 'mm', 'mn', 'mp',
+                       'nd', 'ng', 'nk', 'nt', 'rn', 'rp', 'rt'
+               ];
+
+               $noend = [
+                       'bl', 'br', 'cl', 'cr', 'dr', 'fl', 'fr', 'gl', 'gr',
+                       'kh', 'kl', 'kr', 'mn', 'pl', 'pr', 'rh', 'tr', 'qu', 'wh', 'q'
+               ];
+
+               $start = mt_rand(0, 2);
+               if ($start == 0) {
+                       $table = $vowels;
+               } else {
+                       $table = $cons;
+               }
+
+               $word = '';
+
+               for ($x = 0; $x < $len; $x++) {
+                       $r = mt_rand(0, count($table) - 1);
+                       $word .= $table[$r];
+
+                       if ($table == $vowels) {
+                               $table = array_merge($cons, $midcons);
+                       } else {
+                               $table = $vowels;
+                       }
+               }
+
+               $word = substr($word, 0, $len);
+
+               foreach ($noend as $noe) {
+                       $noelen = strlen($noe);
+                       if ((strlen($word) > $noelen) && (substr($word, -$noelen) == $noe)) {
+                               $word = self::getRandomName($len);
+                               break;
+                       }
+               }
+
+               return $word;
+       }
+
+       /**
+        * Translate and format the network name of a contact
+        *
+        * @param string $network Network name of the contact (e.g. dfrn, rss and so on)
+        * @param string $url     The contact url
+        *
+        * @return string Formatted network name
+        * @throws \Friendica\Network\HTTPException\InternalServerErrorException
          */
-       public static function escape($str)
+       public static function formatNetworkName(string $network, string $url = ''): string
         {
-               $buffer = htmlspecialchars($str, ENT_QUOTES, "UTF-8");
-               $buffer = trim($buffer);
+               if ($network != '') {
+                       if ($url != '') {
+                               $network_name = '<a href="' . $url . '">' . ContactSelector::networkToName($network, $url) . '</a>';
+                       } else {
+                               $network_name = ContactSelector::networkToName($network);
+                       }
  
-               return $buffer;
+                       return $network_name;
+               }
+
+               return '';
         }
  
         /**
-        * undo an escape
-        * @param string $s xml escaped text
-        * @return string unescaped text
+        * Remove indentation from a text
+        *
+        * @param string $text  String to be transformed.
+        * @param string $chr   Optional. Indentation tag. Default tab (\t).
+        * @param int    $count Optional. Default null.
+        *
+        * @return string               Transformed string.
          */
-       public static function unescape($s)
+       public static function deindent(string $text, string $chr = "[\t ]", int $count = null): string
         {
-               $ret = htmlspecialchars_decode($s, ENT_QUOTES);
-               return $ret;
+               $lines = explode("\n", $text);
+
+               if (is_null($count)) {
+                       $m = [];
+                       $k = 0;
+                       while ($k < count($lines) && strlen($lines[$k]) == 0) {
+                               $k++;
+                       }
+                       preg_match("|^" . $chr . "*|", $lines[$k], $m);
+                       $count = strlen($m[0]);
+               }
+
+               for ($k = 0; $k < count($lines); $k++) {
+                       $lines[$k] = preg_replace("|^" . $chr . "{" . $count . "}|", "", $lines[$k]);
+               }
+
+               return implode("\n", $lines);
         }
  
         /**
-        * apply escape() to all values of array $val, recursively
-        * @param array $val
-        * @return array
+        * Get byte size returned in a Data Measurement (KB, MB, GB)
+        *
+        * @param int $bytes    The number of bytes to be measured
+        * @param int $precision        Optional. Default 2.
+        *
+        * @return string       Size with measured units.
          */
-       public static function arrayEscape($val)
+       public static function formatBytes(int $bytes, int $precision = 2): string
         {
-               if (is_bool($val)) {
-                       return $val?"true":"false";
-               } elseif (is_array($val)) {
-                       return array_map('XML::arrayEscape', $val);
+               // If this method is called for an infinite (== unlimited) amount of bytes:
+               if ($bytes == INF) {
+                       return INF;
                 }
-               return self::escape((string) $val);
+
+               $units = ['B', 'KiB', 'MiB', 'GiB', 'TiB'];
+               $bytes = max($bytes, 0);
+               $pow = floor(($bytes ? log($bytes) : 0) / log(1024));
+               $pow = min($pow, count($units) - 1);
+               $bytes /= pow(1024, $pow);
+
+               return round($bytes, $precision) . ' ' . $units[$pow];
+       }
+
+       /**
+        * Protect percent characters in sprintf calls
+        *
+        * @param string $s String to transform.
+        * @return string       Transformed string.
+        */
+       public static function protectSprintf(string $s): string
+       {
+               return str_replace('%', '%%', $s);
+       }
+
+       /**
+        * Base64 Encode URL and translate +/ to -_ Optionally strip padding.
+        *
+        * @param string $s                                     URL to encode
+        * @param boolean $strip_padding        Optional. Default false
+        * @return string       Encoded URL
+        * @see https://web.archive.org/web/20160506073138/http://salmon-protocol.googlecode.com:80/svn/trunk/draft-panzer-magicsig-01.html#params
+        */
+       public static function base64UrlEncode(string $s, bool $strip_padding = false): string
+       {
+               if ($strip_padding) {
+                       $s = Base64::encodeUnpadded($s);
+               } else {
+                       $s = Base64::encode($s);
+               }
+
+               return strtr($s, '+/', '-_');
         }
+
+       /**
+        * Decode Base64 Encoded URL and translate -_ to +/
+        *
+        * @param string $s URL to decode
+        * @return string       Decoded URL
+        * @throws \Exception
+        * @see https://web.archive.org/web/20160506073138/http://salmon-protocol.googlecode.com:80/svn/trunk/draft-panzer-magicsig-01.html#params
+        */
+       public static function base64UrlDecode(string $s): string
+       {
+               return Base64::decode(strtr($s, '-_', '+/'));
+       }
+
+       /**
+        * Normalize url
+        *
+        * @param string $url   URL to be normalized.
+        * @return string       Normalized URL.
+        */
+       public static function normaliseLink(string $url): string
+       {
+               $ret = str_replace(['https:', '//www.'], ['http:', '//'], $url);
+               return rtrim($ret, '/');
+       }
+
+       /**
+        * Normalize OpenID identity
+        *
+        * @param string $s OpenID Identity
+        * @return string       normalized OpenId Identity
+        */
+       public static function normaliseOpenID(string $s): string
+       {
+               return trim(str_replace(['http://', 'https://'], ['', ''], $s), '/');
+       }
+
+       /**
+        * Compare two URLs to see if they are the same, but ignore
+        * slight but hopefully insignificant differences such as if one
+        * is https and the other isn't, or if one is www.something and
+        * the other isn't - and also ignore case differences.
+        *
+        * @param string $a first url
+        * @param string $b second url
+        * @return boolean True if the URLs match, otherwise False
+        *
+        */
+       public static function compareLink(string $a, string $b): bool
+       {
+               return (strcasecmp(self::normaliseLink($a), self::normaliseLink($b)) === 0);
+       }
+
+       /**
+        * Ensures the provided URI has its query string punctuation in order.
+        *
+        * @param string $uri
+        * @return string
+        */
+       public static function ensureQueryParameter(string $uri): string
+       {
+               if (strpos($uri, '?') === false && ($pos = strpos($uri, '&')) !== false) {
+                       $uri = substr($uri, 0, $pos) . '?' . substr($uri, $pos + 1);
+               }
+
+               return $uri;
+       }
+
+       /**
+        * Check if the trimmed provided string is starting with one of the provided characters
+        *
+        * @param string $string
+        * @param array $chars
+        *
+        * @return bool
+        */
+       public static function startsWithChars(string $string, array $chars): bool
+       {
+               $return = in_array(substr(trim($string), 0, 1), $chars);
+
+               return $return;
+       }
+
+       /**
+        * Check if the first string starts with the second
+        *
+        * @see http://maettig.com/code/php/php-performance-benchmarks.php#startswith
+        * @param string $string
+        * @param string $start
+        * @return bool
+        */
+       public static function startsWith(string $string, string $start): bool
+       {
+               $return = substr_compare($string, $start, 0, strlen($start)) === 0;
+
+               return $return;
+       }
+
+       /**
+        * Checks if the first string ends with the second
+        *
+        * @see http://maettig.com/code/php/php-performance-benchmarks.php#endswith
+        * @param string $string
+        * @param string $end
+        *
+        * @return bool
+        */
+       public static function endsWith(string $string, string $end): bool
+       {
+               return (substr_compare($string, $end, -strlen($end)) === 0);
+       }
+
+       /**
+        * Returns the regular expression string to match URLs in a given text
+        *
+        * @return string
+        */
+       public static function autoLinkRegEx(): string
+       {
+               return '@
+(?<![=\'\]"/]) # Not preceded by [, =, \', ], ", /
+\b
+(              # Capture 1: entire matched URL
+  ' . self::linkRegEx() . '
+)@xiu';
+       }
+
+       /**
+        * Returns the regular expression string to match only an HTTP URL
+        *
+        * @return string
+        */
+       public static function onlyLinkRegEx(): string
+       {
+               return '@^' . self::linkRegEx() . '$@xiu';
+       }
+
+       /**
+        * @return string
+        * @see https://daringfireball.net/2010/07/improved_regex_for_matching_urls
+        */
+       private static function linkRegEx(): string
+       {
+               return 'https?://                   # http or https protocol
+  (?:
+       [^/\s\xA0`!()\[\]{};:\'",<>?«»“”‘’.]    # Domain can\'t start with a .
+       [^/\s\xA0`!()\[\]{};:\'",<>?«»“”‘’]+    # Domain can\'t end with a .
+       \.
+       [^/\s\xA0`!()\[\]{};:\'".,<>?«»“”‘’]+/? # Followed by a slash
+  )
+  (?:                                       # One or more:
+       [^\s\xA0()<>]+                            # Run of non-space, non-()<>
+       |                                         #   or
+       \(([^\s\xA0()<>]+|(\([^\s()<>]+\)))*\)    # balanced parens, up to 2 levels
+       |                                                                         #   or
+       [^\s\xA0`!()\[\]{};:\'".,<>?«»“”‘’]         # not a space or one of these punct chars
+  )*';
+       }
+
+       /**
+        * Ensures a single path item doesn't contain any path-traversing characters
+        *
+        * @param string $pathItem
+        *
+        * @see https://stackoverflow.com/a/46097713
+        * @return string
+        */
+       public static function sanitizeFilePathItem(string $pathItem): string
+       {
+               $pathItem = str_replace('/', '_', $pathItem);
+               $pathItem = str_replace('\\', '_', $pathItem);
+               $pathItem = str_replace(DIRECTORY_SEPARATOR, '_', $pathItem); // In case it does not equal the standard values
+
+               return $pathItem;
+       }
+
+       /**
+        * Multi-byte safe implementation of substr_replace where $start and $length are character offset and count rather
+        * than byte offset and counts.
+        *
+        * Depends on mbstring, use default encoding.
+        *
+        * @param string   $string
+        * @param string   $replacement
+        * @param int      $start
+        * @param int|null $length
+        *
+        * @return string
+        * @see substr_replace()
+        */
+       public static function substringReplace(string $string, string $replacement, int $start, int $length = null): string
+       {
+               $string_length = mb_strlen($string);
+
+               $length = $length ?? $string_length;
+
+               if ($start < 0) {
+                       $start = max(0, $string_length + $start);
+               } else if ($start > $string_length) {
+                       $start = $string_length;
+               }
+
+               if ($length < 0) {
+                       $length = max(0, $string_length - $start + $length);
+               } else if ($length > $string_length) {
+                       $length = $string_length;
+               }
+
+               if (($start + $length) > $string_length) {
+                       $length = $string_length - $start;
+               }
+
+               return mb_substr($string, 0, $start) . $replacement . mb_substr($string, $start + $length, $string_length - $start - $length);
+       }
+
+       /**
+        * Perform a custom function on a text after having escaped blocks matched by the provided regular expressions.
+        * Only full matches are used, capturing group are ignored.
+        *
+        * To change the provided text, the callback function needs to return it and this function will return the modified
+        * version as well after having restored the escaped blocks.
+        *
+        * @param string   $text
+        * @param string   $regex
+        * @param callable $callback
+        *
+        * @return string
+        */
+       public static function performWithEscapedBlocks(string $text, string $regex, callable $callback): string
+       {
+               // Enables nested use
+               $executionId = random_int(PHP_INT_MAX / 10, PHP_INT_MAX);
+
+               $blocks = [];
+
+               $return = preg_replace_callback($regex,
+                       function ($matches) use ($executionId, &$blocks) {
+                               $return = '«block-' . $executionId . '-' . count($blocks) . '»';
+
+                               $blocks[] = $matches[0];
+
+                               return $return;
+                       },
+                       $text
+               );
+
+               if (is_null($return)) {
+                       Logger::notice('Received null value from preg_replace_callback', ['text' => $text, 'regex' => $regex, 'blocks' => $blocks, 'executionId' => $executionId, 'callstack' => System::callstack(10)]);
+               }
+
+               $text = $callback($return ?? $text) ?? '';
+
+               // Restore code blocks
+               $text = preg_replace_callback('/«block-' . $executionId . '-([0-9]+)»/iU',
+                       function ($matches) use ($blocks) {
+                               $return = $matches[0];
+                               if (isset($blocks[intval($matches[1])])) {
+                                       $return = $blocks[$matches[1]];
+                               }
+                               return $return;
+                       },
+                       $text
+               );
+
+               return $text;
+       }
+
+       /**
+        * This function converts a PHP's shorhand notation string for file sizes in to an integer number of total bytes.
+        * For example: The string for shorthand notation of '2M' (which is 2,097,152 Bytes) is converted to 2097152
+        * @see https://www.php.net/manual/en/faq.using.php#faq.using.shorthandbytes
+        * @param string $shorthand
+        * @return int
+        */
+       public static function getBytesFromShorthand(string $shorthand): int
+       {
+               $shorthand = trim($shorthand);
+
+               if (is_numeric($shorthand)) {
+                       return $shorthand;
+               }
+
+               $last      = strtolower($shorthand[strlen($shorthand)-1]);
+               $shorthand = substr($shorthand, 0, -1);
+
+               switch($last) {
+                       case 'g':
+                               $shorthand *= 1024;
+                       case 'm':
+                               $shorthand *= 1024;
+                       case 'k':
+                               $shorthand *= 1024;
+               }
+
+               return $shorthand;
+       }
+
  }