From 37188c76b8401d8cf219a1156004afd40fceb9c1 Mon Sep 17 00:00:00 2001 From: gudzpoz Date: Sat, 25 Nov 2023 23:29:39 +0800 Subject: [PATCH] Fix substituting smilies and smilies containing whitespaces --- src/Content/Smilies.php | 138 +++++++----------- tests/Util/SmileyWhitespaceAddon.php | 36 +++++ tests/datasets/api.fixture.php | 2 +- tests/src/Content/SmiliesTest.php | 49 +++++++ tests/src/Factory/Api/Mastodon/StatusTest.php | 10 +- .../Protocol/ActivityPub/TransmitterTest.php | 8 +- 6 files changed, 153 insertions(+), 90 deletions(-) create mode 100644 tests/Util/SmileyWhitespaceAddon.php diff --git a/src/Content/Smilies.php b/src/Content/Smilies.php index 9c357a9eb2..01ac832eac 100644 --- a/src/Content/Smilies.php +++ b/src/Content/Smilies.php @@ -153,43 +153,6 @@ class Smilies return $params; } - /** - * Normalizes smiley shortcodes into texts with no special symbols. - * - * @return array - * 'texts' => smilie shortcut - * 'icons' => icon url or an empty string - * 'norms' => normalized shortcut - */ - public static function getNormalizedList(): array - { - $smilies = self::getList(); - $norms = []; - $icons = $smilies['icons']; - foreach ($smilies['texts'] as $i => $shortcode) { - // Extract urls - $icon = $icons[$i]; - if (preg_match('/src="(.+?)"/', $icon, $match)) { - $icon = $match[1]; - } else { - $icon = ''; - } - $icons[$i] = $icon; - - // Normalize name - $norm = preg_replace('/[\s\-:#~]/', '', $shortcode); - if (ctype_alnum($norm)) { - $norms[] = $norm; - } elseif (preg_match('#/smiley-(\w+)\.gif#', $icon, $match)) { - $norms[] = $match[1]; - } else { - $norms[] = 'smiley' . $i; - } - } - $smilies['norms'] = $norms; - return $smilies; - } - /** * Finds all used smilies (denoted by quoting colons like :heart:) in the provided text and normalizes their usages. * @@ -206,18 +169,36 @@ class Smilies if (strpos($text, '[nosmile]') !== false || self::noSmilies()) { return $text; } - $smilies = self::getNormalizedList(); - $normalized = array_combine($smilies['texts'], $smilies['norms']); + $smilies = self::getList(); + $normalized = []; return self::performForEachWordMatch( array_combine($smilies['texts'], $smilies['icons']), $text, function (string $name, string $image) use($normalized, &$emojis) { - $name = $normalized[$name]; + if (array_key_exists($name, $normalized)) { + return $normalized[$name]; + } if (preg_match('/src="(.+?)"/', $image, $match)) { - $image = $match[1]; - $emojis[$name] = $image; + $url = $match[1]; + // Image smilies, which should be normalized instead of being embedded for some protocols like ActivityPub. + // Normalize name + $norm = preg_replace('/[\s\-:#~]/', '', $name); + if (!ctype_alnum($norm)) { + if (preg_match('#/smiley-(\w+)\.gif#', $url, $match)) { + $norm = $match[1]; + } else { + $norm = 'smiley' . count($normalized); + } + } + $shortcode = ':' . $norm . ':'; + $normalized[$name] = $shortcode; + $emojis[$norm] = $url; + return $shortcode; + } else { + $normalized[$name] = $image; + // Probably text-substitution smilies (e.g., Unicode ones). + return $image; } - return ':' . $name . ':'; }, ); }); @@ -240,11 +221,15 @@ class Smilies $ord2_bitset = 0; $prefixes = []; foreach ($words as $word => $_) { - if (strlen($word) < 2 || !ctype_graph($word)) { + if (strlen($word) < 2) { continue; } $ord1 = ord($word); $ord2 = ord($word[1]); + // A smiley shortcode must not begin or end with whitespaces. + if (ctype_space($ord1) || ctype_space($word[strlen($word) - 1])) { + continue; + } $ord1_bitset |= 1 << ($ord1 & 31); $ord2_bitset |= 1 << ($ord2 & 31); if (!array_key_exists($word[0], $prefixes)) { @@ -253,52 +238,37 @@ class Smilies $prefixes[$word[0]][] = $word; } + $slength = strlen($subject); $result = ''; + // $processed is used to delay string concatenation since appending a char every loop is inefficient. $processed = 0; - $s_start = 0; // Segment start - // No spaces are allowed in smilies, so they can serve as delimiters. - // Splitting by some delimiters may not necessary though? - while (true) { - if ($s_start >= strlen($subject)) { - $result .= substr($subject, $processed); - break; - } - if (preg_match('/\s+?(?=\S|$)/', $subject, $match, PREG_OFFSET_CAPTURE, $s_start)) { - [$whitespaces, $s_end] = $match[0]; - } else { - $s_end = strlen($subject); - $whitespaces = ''; - } - $s_length = $s_end - $s_start; - if ($s_length > 1) { - $segment = substr($subject, $s_start, $s_length); - // Find possible starting points for smilies. - // For built-in smilies, the two bitsets should make attempts quite efficient. - // However, presuming custom smilies follow the format of ":shortcode" or ":shortcode:", - // if the user adds more smilies (with addons), the second bitset may eventually become useless. - for ($i = 0; $i < $s_length - 1; $i++) { - $c = $segment[$i]; - $d = $segment[$i + 1]; - if (($ord1_bitset & (1 << (ord($c) & 31))) && ($ord2_bitset & (1 << (ord($d) & 31))) && array_key_exists($c, $prefixes)) { - foreach ($prefixes[$c] as $word) { - $wlength = strlen($word); - if ($wlength <= $s_length - $i && substr($segment, $i, $wlength) === $word) { - // Check for boundaries - if (($i === 0 || ctype_space($segment[$i - 1]) || ctype_punct($segment[$i - 1])) - && ($i + $wlength >= $s_length || ctype_space($segment[$i + $wlength]) || ctype_punct($segment[$i + $wlength]))) { - $result .= substr($subject, $processed, $s_start - $processed + $i); - $result .= call_user_func($callback, $word, $words[$word]); - $i += $wlength; - $processed = $s_start + $i; - $i--; - break; - } - } + // Find possible starting points for smilies. + // For built-in smilies, the two bitsets should make attempts quite efficient. + // However, presuming custom smilies follow the format of ":shortcode" or ":shortcode:", + // if the user adds more smilies (with addons), the second bitset may eventually become useless. + for ($i = 0; $i < $slength - 1; $i++) { + $c = $subject[$i]; + $d = $subject[$i + 1]; + if (($ord1_bitset & (1 << (ord($c) & 31))) && ($ord2_bitset & (1 << (ord($d) & 31))) && array_key_exists($c, $prefixes)) { + foreach ($prefixes[$c] as $word) { + $wlength = strlen($word); + if (substr($subject, $i, $wlength) === $word) { + // Check for boundaries + if (($i === 0 || ctype_space($subject[$i - 1]) || ctype_punct($subject[$i - 1])) + && ($i + $wlength >= $slength || ctype_space($subject[$i + $wlength]) || ctype_punct($subject[$i + $wlength]))) { + $result .= substr($subject, $processed, $i - $processed); + $result .= call_user_func($callback, $word, $words[$word]); + $i += $wlength; + $processed = $i; + $i--; + break; } } } } - $s_start = $s_end + strlen($whitespaces); + } + if ($processed < $slength) { + $result .= substr($subject, $processed); } return $result; } diff --git a/tests/Util/SmileyWhitespaceAddon.php b/tests/Util/SmileyWhitespaceAddon.php new file mode 100644 index 0000000000..5277d3d3fa --- /dev/null +++ b/tests/Util/SmileyWhitespaceAddon.php @@ -0,0 +1,36 @@ +. + * + */ + +use Friendica\Content\Smilies; + +function add_test_unicode_smilies(array &$b) +{ + // String-substitution smilies + // - no whitespaces + Smilies::add($b, '⽕', '🔥'); + // - with whitespaces + Smilies::add($b, ':hugging face:', '🤗'); + // - with multiple whitespaces + Smilies::add($b, ':face with hand over mouth:', '🤭'); + // Image-based smilies + // - with whitespaces + Smilies::add($b, ':smiley heart 333:', 'smiley-heart'); +} diff --git a/tests/datasets/api.fixture.php b/tests/datasets/api.fixture.php index 2bf38a5e91..1d2bb5f883 100644 --- a/tests/datasets/api.fixture.php +++ b/tests/datasets/api.fixture.php @@ -371,7 +371,7 @@ return [ [ 'uri-id' => 100, 'title' => 'item_title', - 'body' => ':like ~friendica no [code]:dislike[/code] :-p :-[ <3', + 'body' => ':like ~friendica no [code]:dislike[/code] :-p :-[ :hugging face: <3 :smiley heart 333: ⽕', 'plink' => 'https://friendica.local/post/100', ], ], diff --git a/tests/src/Content/SmiliesTest.php b/tests/src/Content/SmiliesTest.php index e41e59ca82..820e378fb3 100644 --- a/tests/src/Content/SmiliesTest.php +++ b/tests/src/Content/SmiliesTest.php @@ -26,6 +26,7 @@ namespace Friendica\Test\src\Content; use Friendica\Content\Smilies; +use Friendica\Core\Hook; use Friendica\DI; use Friendica\Network\HTTPException\InternalServerErrorException; use Friendica\Test\FixtureTest; @@ -37,6 +38,9 @@ class SmiliesTest extends FixtureTest parent::setUp(); DI::config()->set('system', 'no_smilies', false); + + Hook::register('smilie', 'tests/Util/SmileyWhitespaceAddon.php', 'add_test_unicode_smilies'); + Hook::loadHooks(); } public function dataLinks() @@ -184,6 +188,26 @@ class SmiliesTest extends FixtureTest 'expected' => '(3<33)', 'body' => '(3<33)', ], + 'space' => [ + 'expected' => 'alt="smiley-heart"', + 'body' => ':smiley heart 333:', + ], + 'substitution-1' => [ + 'expected' => '🔥', + 'body' => '⽕', + ], + 'substitution-2' => [ + 'expected' => '🤗', + 'body' => ':hugging face:', + ], + 'substitution-3' => [ + 'expected' => '🤭', + 'body' => ':face with hand over mouth:', + ], + 'mixed' => [ + 'expected' => '🔥 🤭 invalid:hugging face: 🤗', + 'body' => '⽕ :face with hand over mouth: invalid:hugging face: :hugging face:', + ], ]; foreach ([':-[', ':-D', 'o.O'] as $emoji) { foreach (['A', '_', ':', '-'] as $prefix) { @@ -245,6 +269,31 @@ class SmiliesTest extends FixtureTest 'body' => '~friendica', 'normalized' => ':friendica:' ], + 'space' => [ + 'expected' => ['smileyheart333'], + 'body' => ':smiley heart 333:', + 'normalized' => ':smileyheart333:' + ], + 'substitution-1' => [ + 'expected' => [], + 'body' => '⽕', + 'normalized' => '🔥', + ], + 'substitution-2' => [ + 'expected' => [], + 'body' => ':hugging face:', + 'normalized' => '🤗', + ], + 'substitution-3' => [ + 'expected' => [], + 'body' => ':face with hand over mouth:', + 'normalized' => '🤭', + ], + 'mixed' => [ + 'expected' => [], + 'body' => '⽕ :face with hand over mouth: invalid:hugging face: :hugging face:', + 'normalized' => '🔥 🤭 invalid:hugging face: 🤗', + ], ]; } diff --git a/tests/src/Factory/Api/Mastodon/StatusTest.php b/tests/src/Factory/Api/Mastodon/StatusTest.php index d150d85574..df702fac85 100644 --- a/tests/src/Factory/Api/Mastodon/StatusTest.php +++ b/tests/src/Factory/Api/Mastodon/StatusTest.php @@ -21,8 +21,9 @@ namespace Friendica\Test\src\Factory\Api\Mastodon; -use Friendica\Model\Post; +use Friendica\Core\Hook; use Friendica\DI; +use Friendica\Model\Post; use Friendica\Test\FixtureTest; class StatusTest extends FixtureTest @@ -35,6 +36,9 @@ class StatusTest extends FixtureTest DI::config()->set('system', 'no_smilies', false); $this->status = DI::mstdnStatus(); + + Hook::register('smilie', 'tests/Util/SmileyWhitespaceAddon.php', 'add_test_unicode_smilies'); + Hook::loadHooks(); } public function testSimpleStatus() @@ -50,8 +54,8 @@ class StatusTest extends FixtureTest $post = Post::selectFirst([], ['id' => 14]); $this->assertNotNull($post); $result = $this->status->createFromUriId($post['uri-id'])->toArray(); - $this->assertEquals(':like: :friendica: no :dislike :p: :embarrassed: ❤', $result['content']); - $emojis = array_fill_keys(['like', 'friendica', 'p', 'embarrassed'], true); + $this->assertEquals(':like: :friendica: no :dislike :p: :embarrassed: 🤗 ❤ :smileyheart333: 🔥', $result['content']); + $emojis = array_fill_keys(['like', 'friendica', 'p', 'embarrassed', 'smileyheart333'], true); $this->assertEquals(count($emojis), count($result['emojis'])); foreach ($result['emojis'] as $emoji) { $this->assertTrue(array_key_exists($emoji['shortcode'], $emojis)); diff --git a/tests/src/Protocol/ActivityPub/TransmitterTest.php b/tests/src/Protocol/ActivityPub/TransmitterTest.php index 49b51da4b9..3eb9cb020e 100644 --- a/tests/src/Protocol/ActivityPub/TransmitterTest.php +++ b/tests/src/Protocol/ActivityPub/TransmitterTest.php @@ -21,6 +21,7 @@ namespace Friendica\Test\src\Protocol\ActivityPub; +use Friendica\Core\Hook; use Friendica\DI; use Friendica\Model\Post; use Friendica\Protocol\ActivityPub\Transmitter; @@ -33,6 +34,9 @@ class TransmitterTest extends FixtureTest parent::setUp(); DI::config()->set('system', 'no_smilies', false); + + Hook::register('smilie', 'tests/Util/SmileyWhitespaceAddon.php', 'add_test_unicode_smilies'); + Hook::loadHooks(); } public function testEmojiPost() @@ -42,8 +46,8 @@ class TransmitterTest extends FixtureTest $note = Transmitter::createNote($post); $this->assertNotNull($note); - $this->assertEquals(':like: :friendica: no :dislike :p: :embarrassed: ❤', $note['content']); - $emojis = array_fill_keys(['like', 'friendica', 'p', 'embarrassed'], true); + $this->assertEquals(':like: :friendica: no :dislike :p: :embarrassed: 🤗 ❤ :smileyheart333: 🔥', $note['content']); + $emojis = array_fill_keys(['like', 'friendica', 'p', 'embarrassed', 'smileyheart333'], true); $this->assertEquals(count($emojis), count($note['tag'])); foreach ($note['tag'] as $emoji) { $this->assertTrue(array_key_exists($emoji['name'], $emojis)); -- 2.39.5