]> git.mxchange.org Git - friendica.git/blobdiff - src/Content/Smilies.php
Remove DI dependency in Object\Api\Mastodon\Instance
[friendica.git] / src / Content / Smilies.php
index 1aeab5b804fb5bdfe62fbfdc815781064735e3d0..29752701f0fe2491bf27f1f10b0776d10bd7e4b8 100644 (file)
@@ -153,43 +153,6 @@ class Smilies
                return $params;
        }
 
-       /**
-        * Normalizes smiley shortcodes into texts with no special symbols.
-        *
-        * @return array
-        *    'texts' => smilie shortcut
-        *    'icons' => icon url or an empty string
-        *    'norms' => normalized shortcut
-        */
-       public static function getNormalizedList(): array
-       {
-               $smilies = self::getList();
-               $norms = [];
-               $icons = $smilies['icons'];
-               foreach ($smilies['texts'] as $i => $shortcode) {
-                       // Extract urls
-                       $icon = $icons[$i];
-                       if (preg_match('/src="(.+?)"/', $icon, $match)) {
-                               $icon = $match[1];
-                       } else {
-                               $icon = '';
-                       }
-                       $icons[$i] = $icon;
-
-                       // Normalize name
-                       $norm = preg_replace('/[\s\-:#~]/', '', $shortcode);
-                       if (ctype_alnum($norm)) {
-                               $norms[] = $norm;
-                       } elseif (preg_match('#/smiley-(\w+)\.gif#', $icon, $match)) {
-                               $norms[] = $match[1];
-                       } else {
-                               $norms[] = 'smiley' . $i;
-                       }
-               }
-               $smilies['norms'] = $norms;
-               return $smilies;
-       }
-
        /**
         * Finds all used smilies (denoted by quoting colons like :heart:) in the provided text and normalizes their usages.
         *
@@ -197,27 +160,45 @@ class Smilies
         * @return array with smilie codes (colon included) as the keys, their image urls as values;
         *               the normalized string is put under the '' (empty string) key
         */
-       public static function extractUsedSmilies(string $text): array
+       public static function extractUsedSmilies(string $text, string &$normalized = null): array
        {
                $emojis = [];
 
-               $emojis[''] = BBCode::performWithEscapedTags($text, ['code'], function ($text) use (&$emojis) {
+               $normalized = BBCode::performWithEscapedTags($text, ['code'], function ($text) use (&$emojis) {
                        return BBCode::performWithEscapedTags($text, ['noparse', 'nobb', 'pre'], function ($text) use (&$emojis) {
                                if (strpos($text, '[nosmile]') !== false || self::noSmilies()) {
                                        return $text;
                                }
-                               $smilies = self::getNormalizedList();
-                               $normalized = array_combine($smilies['texts'], $smilies['norms']);
+                               $smilies = self::getList();
+                               $normalized = [];
                                return self::performForEachWordMatch(
                                        array_combine($smilies['texts'], $smilies['icons']),
                                        $text,
                                        function (string $name, string $image) use($normalized, &$emojis) {
-                                               $name = $normalized[$name];
+                                               if (array_key_exists($name, $normalized)) {
+                                                       return $normalized[$name];
+                                               }
                                                if (preg_match('/src="(.+?)"/', $image, $match)) {
-                                                       $image = $match[1];
-                                                       $emojis[$name] = $image;
+                                                       $url = $match[1];
+                                                       // Image smilies, which should be normalized instead of being embedded for some protocols like ActivityPub.
+                                                       // Normalize name
+                                                       $norm = preg_replace('/[\s\-:#~]/', '', $name);
+                                                       if (!ctype_alnum($norm)) {
+                                                               if (preg_match('#/smiley-(\w+)\.gif#', $url, $match)) {
+                                                                       $norm = $match[1];
+                                                               } else {
+                                                                       $norm = 'smiley' . count($normalized);
+                                                               }
+                                                       }
+                                                       $shortcode = ':' . $norm . ':';
+                                                       $normalized[$name] = $shortcode;
+                                                       $emojis[$norm] = $url;
+                                                       return $shortcode;
+                                               } else {
+                                                       $normalized[$name] = $image;
+                                                       // Probably text-substitution smilies (e.g., Unicode ones).
+                                                       return $image;
                                                }
-                                               return ':' . $name . ':';
                                        },
                                );
                        });
@@ -236,42 +217,57 @@ class Smilies
         */
        private static function performForEachWordMatch(array $words, string $subject, callable $callback): string
        {
-               $offset = 0;
-               $result = '';
-               $processed = 0;
-               // Learned from PHP's strtr implementation
-               // Should probably improve performance once JIT-compiled
-               $length_bitset = 0;
-               $ord_bitset = 0;
+               $ord1_bitset = 0;
+               $ord2_bitset = 0;
+               $prefixes = [];
                foreach ($words as $word => $_) {
-                       $length = strlen($word);
-                       if ($length <= 31) {
-                               $length_bitset |= 1 << $length;
+                       if (strlen($word) < 2) {
+                               continue;
+                       }
+                       $ord1 = ord($word[0]);
+                       $ord2 = ord($word[1]);
+                       // A smiley shortcode must not begin or end with whitespaces.
+                       if (ctype_space($word[0]) || ctype_space($word[strlen($word) - 1])) {
+                               continue;
+                       }
+                       $ord1_bitset |= 1 << ($ord1 & 31);
+                       $ord2_bitset |= 1 << ($ord2 & 31);
+                       if (!array_key_exists($word[0], $prefixes)) {
+                               $prefixes[$word[0]] = [];
                        }
-                       $ord = ord($word);
-                       $ord_bitset |= 1 << ($ord & 31);
+                       $prefixes[$word[0]][] = $word;
                }
 
-               while ($offset < strlen($subject) && preg_match('/\s+?(?=\S|$)/', $subject, $matches, PREG_OFFSET_CAPTURE, $offset)) {
-                       [$whitespaces, $next] = $matches[0];
-                       $word = substr($subject, $offset, $next - $offset);
-
-                       $shift = strlen($word);
-                       $ord = ord($word);
-                       if (($shift > 31 || ($length_bitset & (1 << $shift)))
-                               && ($ord_bitset & (1 << ($ord & 31)))
-                               && array_key_exists($word, $words)) {
-                               $result .= substr($subject, $processed, $offset - $processed);
-                               $result .= call_user_func($callback, $word, $words[$word]);
-                               $processed = $offset + strlen($word);
+               $slength = strlen($subject);
+               $result = '';
+               // $processed is used to delay string concatenation since appending a char every loop is inefficient.
+               $processed = 0;
+               // Find possible starting points for smilies.
+               // For built-in smilies, the two bitsets should make attempts quite efficient.
+               // However, presuming custom smilies follow the format of ":shortcode" or ":shortcode:",
+               // if the user adds more smilies (with addons), the second bitset may eventually become useless.
+               for ($i = 0; $i < $slength - 1; $i++) {
+                       $c = $subject[$i];
+                       $d = $subject[$i + 1];
+                       if (($ord1_bitset & (1 << (ord($c) & 31))) && ($ord2_bitset & (1 << (ord($d) & 31))) && array_key_exists($c, $prefixes)) {
+                               foreach ($prefixes[$c] as $word) {
+                                       $wlength = strlen($word);
+                                       if (substr($subject, $i, $wlength) === $word) {
+                                               // Check for boundaries
+                                               if (($i === 0 || ctype_space($subject[$i - 1]) || ctype_punct($subject[$i - 1]))
+                                                       && ($i + $wlength >= $slength || ctype_space($subject[$i + $wlength]) || ctype_punct($subject[$i + $wlength]))) {
+                                                       $result .= substr($subject, $processed, $i - $processed);
+                                                       $result .= call_user_func($callback, $word, $words[$word]);
+                                                       $i += $wlength;
+                                                       $processed = $i;
+                                                       $i--;
+                                                       break;
+                                               }
+                                       }
+                               }
                        }
-                       $offset = $next + strlen($whitespaces);
                }
-               $word = substr($subject, $offset);
-               if (array_key_exists($word, $words)) {
-                       $result .= substr($subject, $processed, $offset - $processed);
-                       $result .= call_user_func($callback, $word, $words[$word]);
-               } else {
+               if ($processed < $slength) {
                        $result .= substr($subject, $processed);
                }
                return $result;