From d493946ba4411aab1d6c522378a703f29bfbcba7 Mon Sep 17 00:00:00 2001 From: gudzpoz Date: Thu, 16 Nov 2023 13:31:31 +0800 Subject: [PATCH] Allow using punctuation chars as smiley delimiters --- src/Content/Smilies.php | 94 ++++++++++++------- src/Factory/Api/Mastodon/Status.php | 8 +- src/Protocol/ActivityPub/Transmitter.php | 6 +- tests/datasets/api.fixture.php | 2 +- tests/src/Content/SmiliesTest.php | 33 +++++-- tests/src/Factory/Api/Mastodon/StatusTest.php | 2 +- .../Protocol/ActivityPub/TransmitterTest.php | 2 +- 7 files changed, 94 insertions(+), 53 deletions(-) diff --git a/src/Content/Smilies.php b/src/Content/Smilies.php index 1aeab5b804..9c357a9eb2 100644 --- a/src/Content/Smilies.php +++ b/src/Content/Smilies.php @@ -197,11 +197,11 @@ class Smilies * @return array with smilie codes (colon included) as the keys, their image urls as values; * the normalized string is put under the '' (empty string) key */ - public static function extractUsedSmilies(string $text): array + public static function extractUsedSmilies(string $text, string &$normalized = null): array { $emojis = []; - $emojis[''] = BBCode::performWithEscapedTags($text, ['code'], function ($text) use (&$emojis) { + $normalized = BBCode::performWithEscapedTags($text, ['code'], function ($text) use (&$emojis) { return BBCode::performWithEscapedTags($text, ['noparse', 'nobb', 'pre'], function ($text) use (&$emojis) { if (strpos($text, '[nosmile]') !== false || self::noSmilies()) { return $text; @@ -236,43 +236,69 @@ class Smilies */ private static function performForEachWordMatch(array $words, string $subject, callable $callback): string { - $offset = 0; - $result = ''; - $processed = 0; - // Learned from PHP's strtr implementation - // Should probably improve performance once JIT-compiled - $length_bitset = 0; - $ord_bitset = 0; + $ord1_bitset = 0; + $ord2_bitset = 0; + $prefixes = []; foreach ($words as $word => $_) { - $length = strlen($word); - if ($length <= 31) { - $length_bitset |= 1 << $length; + if (strlen($word) < 2 || !ctype_graph($word)) { + continue; + } + $ord1 = ord($word); + $ord2 = ord($word[1]); + $ord1_bitset |= 1 << ($ord1 & 31); + $ord2_bitset |= 1 << ($ord2 & 31); + if (!array_key_exists($word[0], $prefixes)) { + $prefixes[$word[0]] = []; } - $ord = ord($word); - $ord_bitset |= 1 << ($ord & 31); + $prefixes[$word[0]][] = $word; } - while ($offset < strlen($subject) && preg_match('/\s+?(?=\S|$)/', $subject, $matches, PREG_OFFSET_CAPTURE, $offset)) { - [$whitespaces, $next] = $matches[0]; - $word = substr($subject, $offset, $next - $offset); - - $shift = strlen($word); - $ord = ord($word); - if (($shift > 31 || ($length_bitset & (1 << $shift))) - && ($ord_bitset & (1 << ($ord & 31))) - && array_key_exists($word, $words)) { - $result .= substr($subject, $processed, $offset - $processed); - $result .= call_user_func($callback, $word, $words[$word]); - $processed = $offset + strlen($word); + $result = ''; + $processed = 0; + $s_start = 0; // Segment start + // No spaces are allowed in smilies, so they can serve as delimiters. + // Splitting by some delimiters may not necessary though? + while (true) { + if ($s_start >= strlen($subject)) { + $result .= substr($subject, $processed); + break; } - $offset = $next + strlen($whitespaces); - } - $word = substr($subject, $offset); - if (array_key_exists($word, $words)) { - $result .= substr($subject, $processed, $offset - $processed); - $result .= call_user_func($callback, $word, $words[$word]); - } else { - $result .= substr($subject, $processed); + if (preg_match('/\s+?(?=\S|$)/', $subject, $match, PREG_OFFSET_CAPTURE, $s_start)) { + [$whitespaces, $s_end] = $match[0]; + } else { + $s_end = strlen($subject); + $whitespaces = ''; + } + $s_length = $s_end - $s_start; + if ($s_length > 1) { + $segment = substr($subject, $s_start, $s_length); + // Find possible starting points for smilies. + // For built-in smilies, the two bitsets should make attempts quite efficient. + // However, presuming custom smilies follow the format of ":shortcode" or ":shortcode:", + // if the user adds more smilies (with addons), the second bitset may eventually become useless. + for ($i = 0; $i < $s_length - 1; $i++) { + $c = $segment[$i]; + $d = $segment[$i + 1]; + if (($ord1_bitset & (1 << (ord($c) & 31))) && ($ord2_bitset & (1 << (ord($d) & 31))) && array_key_exists($c, $prefixes)) { + foreach ($prefixes[$c] as $word) { + $wlength = strlen($word); + if ($wlength <= $s_length - $i && substr($segment, $i, $wlength) === $word) { + // Check for boundaries + if (($i === 0 || ctype_space($segment[$i - 1]) || ctype_punct($segment[$i - 1])) + && ($i + $wlength >= $s_length || ctype_space($segment[$i + $wlength]) || ctype_punct($segment[$i + $wlength]))) { + $result .= substr($subject, $processed, $s_start - $processed + $i); + $result .= call_user_func($callback, $word, $words[$word]); + $i += $wlength; + $processed = $s_start + $i; + $i--; + break; + } + } + } + } + } + } + $s_start = $s_end + strlen($whitespaces); } return $result; } diff --git a/src/Factory/Api/Mastodon/Status.php b/src/Factory/Api/Mastodon/Status.php index 6d45b4d9fe..5cd90ecb5f 100644 --- a/src/Factory/Api/Mastodon/Status.php +++ b/src/Factory/Api/Mastodon/Status.php @@ -290,14 +290,12 @@ class Status extends BaseFactory $emojis = null; if (DI::baseUrl()->isLocalUrl($item['uri'])) { - $used_smilies = Smilies::extractUsedSmilies($item['raw-body'] ?: $item['body']); - // $used_smilies contains normalized texts + $used_smilies = Smilies::extractUsedSmilies($item['raw-body'] ?: $item['body'], $normalized); if ($item['raw-body']) { - $item['raw-body'] = $used_smilies['']; + $item['raw-body'] = $normalized; } elseif ($item['body']) { - $item['body'] = $used_smilies['']; + $item['body'] = $normalized; } - unset($used_smilies['']); $emojis = $this->mstdnEmojiFactory->createCollectionFromArray($used_smilies)->getArrayCopy(true); } else { if (preg_match_all("(\[emoji=(.*?)](.*?)\[/emoji])ism", $item['body'] ?: $item['raw-body'], $matches)) { diff --git a/src/Protocol/ActivityPub/Transmitter.php b/src/Protocol/ActivityPub/Transmitter.php index 56724e22d2..9d0d998f9d 100644 --- a/src/Protocol/ActivityPub/Transmitter.php +++ b/src/Protocol/ActivityPub/Transmitter.php @@ -1514,11 +1514,9 @@ class Transmitter * @param string $text Text containing tags like :tag: * @return string normalized text */ - private static function addEmojiTags(array &$tags, string $text) + private static function addEmojiTags(array &$tags, string $text): string { - $emojis = Smilies::extractUsedSmilies($text); - $normalized = $emojis['']; - unset($emojis['']); + $emojis = Smilies::extractUsedSmilies($text, $normalized); foreach ($emojis as $name => $url) { $tags[] = [ 'type' => 'Emoji', diff --git a/tests/datasets/api.fixture.php b/tests/datasets/api.fixture.php index 876827d748..2bf38a5e91 100644 --- a/tests/datasets/api.fixture.php +++ b/tests/datasets/api.fixture.php @@ -371,7 +371,7 @@ return [ [ 'uri-id' => 100, 'title' => 'item_title', - 'body' => ':like ~friendica no [code]:dislike[/code] :-p :-[', + 'body' => ':like ~friendica no [code]:dislike[/code] :-p :-[ <3', 'plink' => 'https://friendica.local/post/100', ], ], diff --git a/tests/src/Content/SmiliesTest.php b/tests/src/Content/SmiliesTest.php index 67ba313fe6..e41e59ca82 100644 --- a/tests/src/Content/SmiliesTest.php +++ b/tests/src/Content/SmiliesTest.php @@ -147,7 +147,7 @@ class SmiliesTest extends FixtureTest public function dataReplace(): array { - return [ + $data = [ 'simple-1' => [ 'expected' => 'alt=":-p"', 'body' => ':-p', @@ -165,7 +165,7 @@ class SmiliesTest extends FixtureTest 'body' => '~friendicaca', ], 'symbol-boundary-1' => [ - 'expected' => '(:-p)', + 'expected' => 'alt=":-p"', 'body' => '(:-p)', ], 'hearts-1' => [ @@ -185,6 +185,19 @@ class SmiliesTest extends FixtureTest 'body' => '(3<33)', ], ]; + foreach ([':-[', ':-D', 'o.O'] as $emoji) { + foreach (['A', '_', ':', '-'] as $prefix) { + foreach (['', ' ', 'A', ':', '-'] as $suffix) { + $no_smile = ($prefix !== '' && ctype_alnum($prefix)) || ($suffix !== '' && ctype_alnum($suffix)); + $s = $prefix . $emoji . $suffix; + $data[] = [ + 'expected' => $no_smile ? $s : 'alt="' . $emoji . '"', + 'body' => $s, + ]; + } + } + } + return $data; } /** @@ -202,6 +215,11 @@ class SmiliesTest extends FixtureTest public function dataExtractUsedSmilies(): array { return [ + 'symbols' => [ + 'expected' => ['p', 'heart', 'embarrassed', 'kiss'], + 'body' => ':-p <3 ":-[:-"', + 'normalized' => ':p: :heart: ":embarrassed::kiss:', + ], 'single-smiley' => [ 'expected' => ['like'], 'body' => ':like', @@ -239,11 +257,12 @@ class SmiliesTest extends FixtureTest */ public function testExtractUsedSmilies(array $expected, string $body, string $normalized) { - $extracted = Smilies::extractUsedSmilies($body); - $this->assertEquals($normalized, $extracted['']); - foreach ($expected as $shortcode) { - $this->assertArrayHasKey($shortcode, $extracted); + $extracted = Smilies::extractUsedSmilies($body, $converted); + $expected = array_fill_keys($expected, true); + $this->assertEquals($normalized, $converted); + foreach (array_keys($extracted) as $shortcode) { + $this->assertArrayHasKey($shortcode, $expected); } - $this->assertEquals(count($expected), count($extracted) - 1); + $this->assertEquals(count($expected), count($extracted)); } } diff --git a/tests/src/Factory/Api/Mastodon/StatusTest.php b/tests/src/Factory/Api/Mastodon/StatusTest.php index 7593d9a32b..d150d85574 100644 --- a/tests/src/Factory/Api/Mastodon/StatusTest.php +++ b/tests/src/Factory/Api/Mastodon/StatusTest.php @@ -50,7 +50,7 @@ class StatusTest extends FixtureTest $post = Post::selectFirst([], ['id' => 14]); $this->assertNotNull($post); $result = $this->status->createFromUriId($post['uri-id'])->toArray(); - $this->assertEquals(':like: :friendica: no :dislike :p: :embarrassed:', $result['content']); + $this->assertEquals(':like: :friendica: no :dislike :p: :embarrassed: ❤', $result['content']); $emojis = array_fill_keys(['like', 'friendica', 'p', 'embarrassed'], true); $this->assertEquals(count($emojis), count($result['emojis'])); foreach ($result['emojis'] as $emoji) { diff --git a/tests/src/Protocol/ActivityPub/TransmitterTest.php b/tests/src/Protocol/ActivityPub/TransmitterTest.php index c7a94bc598..49b51da4b9 100644 --- a/tests/src/Protocol/ActivityPub/TransmitterTest.php +++ b/tests/src/Protocol/ActivityPub/TransmitterTest.php @@ -42,7 +42,7 @@ class TransmitterTest extends FixtureTest $note = Transmitter::createNote($post); $this->assertNotNull($note); - $this->assertEquals(':like: :friendica: no :dislike :p: :embarrassed:', $note['content']); + $this->assertEquals(':like: :friendica: no :dislike :p: :embarrassed: ❤', $note['content']); $emojis = array_fill_keys(['like', 'friendica', 'p', 'embarrassed'], true); $this->assertEquals(count($emojis), count($note['tag'])); foreach ($note['tag'] as $emoji) { -- 2.39.5