From d45e9d6af23b84c4ae0ddf96c112a0a01692dd8e Mon Sep 17 00:00:00 2001 From: gudzpoz Date: Wed, 15 Nov 2023 23:53:38 +0800 Subject: [PATCH] Require whitespace around smilies and normalize federating text --- src/Content/Smilies.php | 165 ++++++++++++++---- src/Factory/Api/Mastodon/Emoji.php | 26 ++- src/Factory/Api/Mastodon/Status.php | 11 +- src/Protocol/ActivityPub/Transmitter.php | 16 +- tests/datasets/api.fixture.php | 33 ++++ tests/src/Content/SmiliesTest.php | 103 +++++++++++ tests/src/Factory/Api/Mastodon/EmojiTest.php | 45 +++++ tests/src/Factory/Api/Mastodon/StatusTest.php | 61 +++++++ .../Protocol/ActivityPub/TransmitterTest.php | 53 ++++++ 9 files changed, 457 insertions(+), 56 deletions(-) create mode 100644 tests/src/Factory/Api/Mastodon/EmojiTest.php create mode 100644 tests/src/Factory/Api/Mastodon/StatusTest.php create mode 100644 tests/src/Protocol/ActivityPub/TransmitterTest.php diff --git a/src/Content/Smilies.php b/src/Content/Smilies.php index 760bfbce9e..1aeab5b804 100644 --- a/src/Content/Smilies.php +++ b/src/Content/Smilies.php @@ -21,6 +21,7 @@ namespace Friendica\Content; +use Friendica\Content\Text\BBCode; use Friendica\Core\Hook; use Friendica\DI; use Friendica\Util\Strings; @@ -67,7 +68,7 @@ class Smilies */ public static function getList(): array { - $texts = [ + $texts = [ '<3', '</3', '<\\3', @@ -153,34 +154,129 @@ class Smilies } /** - * Finds all used smilies (like :heart: or :p) in the provided text. + * Normalizes smiley shortcodes into texts with no special symbols. * - * @param string $text that might contain smilie usages (denoted by a starting colon) - * @param bool $extract_url whether to further extract image urls - * @return array with smilie codes (colon included) as the keys, the smilie images as values + * @return array + * 'texts' => smilie shortcut + * 'icons' => icon url or an empty string + * 'norms' => normalized shortcut */ - public static function extractUsedSmilies(string $text, bool $extract_url = false): array + public static function getNormalizedList(): array { - $emojis = []; - $smilies = self::getList(); + $norms = []; $icons = $smilies['icons']; - foreach ($smilies['texts'] as $i => $name) { - if (strstr($text, $name)) { - $image = $icons[$i]; - if ($extract_url) { - if (preg_match('/src="(.+?)"/', $image, $match)) { - $image = $match[1]; - } else { - continue; - } - } - $emojis[$name] = $image; + foreach ($smilies['texts'] as $i => $shortcode) { + // Extract urls + $icon = $icons[$i]; + if (preg_match('/src="(.+?)"/', $icon, $match)) { + $icon = $match[1]; + } else { + $icon = ''; + } + $icons[$i] = $icon; + + // Normalize name + $norm = preg_replace('/[\s\-:#~]/', '', $shortcode); + if (ctype_alnum($norm)) { + $norms[] = $norm; + } elseif (preg_match('#/smiley-(\w+)\.gif#', $icon, $match)) { + $norms[] = $match[1]; + } else { + $norms[] = 'smiley' . $i; } } + $smilies['norms'] = $norms; + return $smilies; + } + + /** + * Finds all used smilies (denoted by quoting colons like :heart:) in the provided text and normalizes their usages. + * + * @param string $text that might contain smiley usages + * @return array with smilie codes (colon included) as the keys, their image urls as values; + * the normalized string is put under the '' (empty string) key + */ + public static function extractUsedSmilies(string $text): array + { + $emojis = []; + + $emojis[''] = BBCode::performWithEscapedTags($text, ['code'], function ($text) use (&$emojis) { + return BBCode::performWithEscapedTags($text, ['noparse', 'nobb', 'pre'], function ($text) use (&$emojis) { + if (strpos($text, '[nosmile]') !== false || self::noSmilies()) { + return $text; + } + $smilies = self::getNormalizedList(); + $normalized = array_combine($smilies['texts'], $smilies['norms']); + return self::performForEachWordMatch( + array_combine($smilies['texts'], $smilies['icons']), + $text, + function (string $name, string $image) use($normalized, &$emojis) { + $name = $normalized[$name]; + if (preg_match('/src="(.+?)"/', $image, $match)) { + $image = $match[1]; + $emojis[$name] = $image; + } + return ':' . $name . ':'; + }, + ); + }); + }); + return $emojis; } + /** + * Similar to strtr but matches only whole words and replaces texts with $callback. + * + * @param array $words + * @param string $subject + * @param callable $callback ($offset, $value) + * @return string + */ + private static function performForEachWordMatch(array $words, string $subject, callable $callback): string + { + $offset = 0; + $result = ''; + $processed = 0; + // Learned from PHP's strtr implementation + // Should probably improve performance once JIT-compiled + $length_bitset = 0; + $ord_bitset = 0; + foreach ($words as $word => $_) { + $length = strlen($word); + if ($length <= 31) { + $length_bitset |= 1 << $length; + } + $ord = ord($word); + $ord_bitset |= 1 << ($ord & 31); + } + + while ($offset < strlen($subject) && preg_match('/\s+?(?=\S|$)/', $subject, $matches, PREG_OFFSET_CAPTURE, $offset)) { + [$whitespaces, $next] = $matches[0]; + $word = substr($subject, $offset, $next - $offset); + + $shift = strlen($word); + $ord = ord($word); + if (($shift > 31 || ($length_bitset & (1 << $shift))) + && ($ord_bitset & (1 << ($ord & 31))) + && array_key_exists($word, $words)) { + $result .= substr($subject, $processed, $offset - $processed); + $result .= call_user_func($callback, $word, $words[$word]); + $processed = $offset + strlen($word); + } + $offset = $next + strlen($whitespaces); + } + $word = substr($subject, $offset); + if (array_key_exists($word, $words)) { + $result .= substr($subject, $processed, $offset - $processed); + $result .= call_user_func($callback, $word, $words[$word]); + } else { + $result .= substr($subject, $processed); + } + return $result; + } + /** * Copied from http://php.net/manual/en/function.str-replace.php#88569 * Modified for camel caps: renamed stro_replace -> strOrigReplace @@ -198,7 +294,13 @@ class Smilies */ private static function strOrigReplace(array $search, array $replace, string $subject): string { - return strtr($subject, array_combine($search, $replace)); + return self::performForEachWordMatch( + array_combine($search, $replace), + $subject, + function (string $_, string $value) { + return $value; + } + ); } /** @@ -227,6 +329,12 @@ class Smilies return $s; } + private static function noSmilies(): bool { + return (intval(DI::config()->get('system', 'no_smilies')) || + (DI::userSession()->getLocalUserId() && + intval(DI::pConfig()->get(DI::userSession()->getLocalUserId(), 'system', 'no_smilies')))); + } + /** * Replaces emoji shortcodes in a string from a structured array of searches and replaces. * @@ -240,9 +348,7 @@ class Smilies */ public static function replaceFromArray(string $text, array $smilies, bool $no_images = false): string { - if (intval(DI::config()->get('system', 'no_smilies')) - || (DI::userSession()->getLocalUserId() && intval(DI::pConfig()->get(DI::userSession()->getLocalUserId(), 'system', 'no_smilies'))) - ) { + if (self::noSmilies()) { return $text; } @@ -261,7 +367,7 @@ class Smilies $smilies = $cleaned; } - $text = preg_replace_callback('/<(3+)/', [self::class, 'heartReplaceCallback'], $text); + $text = preg_replace_callback('/\B<3+?\b/', [self::class, 'heartReplaceCallback'], $text); $text = self::strOrigReplace($smilies['texts'], $smilies['icons'], $text); $text = preg_replace_callback('/<(code)>(.*?)<\/code>/ism', [self::class, 'decode'], $text); @@ -302,16 +408,7 @@ class Smilies */ private static function heartReplaceCallback(array $matches): string { - if (strlen($matches[1]) == 1) { - return $matches[0]; - } - - $t = ''; - for ($cnt = 0; $cnt < strlen($matches[1]); $cnt ++) { - $t .= '❤'; - } - - return str_replace($matches[0], $t, $matches[0]); + return str_repeat('❤', strlen($matches[0]) - 4); } /** diff --git a/src/Factory/Api/Mastodon/Emoji.php b/src/Factory/Api/Mastodon/Emoji.php index 712bddb48d..0a2440426c 100644 --- a/src/Factory/Api/Mastodon/Emoji.php +++ b/src/Factory/Api/Mastodon/Emoji.php @@ -34,28 +34,18 @@ class Emoji extends BaseFactory /** * Creates an emoji collection from shortcode => image mappings. * - * Only emojis with shortcodes of the form of ':shortcode:' are passed in the collection. - * * @param array $smilies - * @param bool $extract_url * * @return Emojis */ - public function createCollectionFromArray(array $smilies, bool $extract_url = true): Emojis + public function createCollectionFromArray(array $smilies): Emojis { $prototype = null; $emojis = []; foreach ($smilies as $shortcode => $url) { - if (substr($shortcode, 0, 1) == ':' && substr($shortcode, -1) == ':') { - if ($extract_url) { - if (preg_match('/src="(.+?)"/', $url, $matches)) { - $url = $matches[1]; - } else { - continue; - } - } + if ($shortcode !== '' && $url !== '') { $shortcode = trim($shortcode, ':'); if ($prototype === null) { @@ -71,12 +61,20 @@ class Emoji extends BaseFactory } /** - * @param array $smilies + * @param array $smilies as is returned by Smilies::getList() * * @return Emojis */ public function createCollectionFromSmilies(array $smilies): Emojis { - return self::createCollectionFromArray(array_combine($smilies['texts'], $smilies['icons'])); + $emojis = []; + $icons = $smilies['icons']; + foreach ($smilies['texts'] as $i => $name) { + $url = $icons[$i]; + if (preg_match('/src="(.+?)"/', $url, $matches)) { + $emojis[$name] = $matches[1]; + } + } + return self::createCollectionFromArray($emojis); } } diff --git a/src/Factory/Api/Mastodon/Status.php b/src/Factory/Api/Mastodon/Status.php index fb73432f03..6d45b4d9fe 100644 --- a/src/Factory/Api/Mastodon/Status.php +++ b/src/Factory/Api/Mastodon/Status.php @@ -290,11 +290,18 @@ class Status extends BaseFactory $emojis = null; if (DI::baseUrl()->isLocalUrl($item['uri'])) { - $used_smilies = Smilies::extractUsedSmilies($item['body'] ?: $item['raw-body']); + $used_smilies = Smilies::extractUsedSmilies($item['raw-body'] ?: $item['body']); + // $used_smilies contains normalized texts + if ($item['raw-body']) { + $item['raw-body'] = $used_smilies['']; + } elseif ($item['body']) { + $item['body'] = $used_smilies['']; + } + unset($used_smilies['']); $emojis = $this->mstdnEmojiFactory->createCollectionFromArray($used_smilies)->getArrayCopy(true); } else { if (preg_match_all("(\[emoji=(.*?)](.*?)\[/emoji])ism", $item['body'] ?: $item['raw-body'], $matches)) { - $emojis = $this->mstdnEmojiFactory->createCollectionFromArray(array_combine($matches[2], $matches[1]), false)->getArrayCopy(true); + $emojis = $this->mstdnEmojiFactory->createCollectionFromArray(array_combine($matches[2], $matches[1]))->getArrayCopy(true); } } diff --git a/src/Protocol/ActivityPub/Transmitter.php b/src/Protocol/ActivityPub/Transmitter.php index 130aa3ab08..56724e22d2 100644 --- a/src/Protocol/ActivityPub/Transmitter.php +++ b/src/Protocol/ActivityPub/Transmitter.php @@ -899,7 +899,7 @@ class Transmitter $tags = Tag::getByURIId($uri_id, [Tag::TO, Tag::CC, Tag::BCC, Tag::AUDIENCE]); if (empty($tags)) { Logger::debug('No receivers found', ['uri-id' => $uri_id]); - $post = Post::selectFirst([Item::DELIVER_FIELDLIST], ['uri-id' => $uri_id, 'origin' => true]); + $post = Post::selectFirst(Item::DELIVER_FIELDLIST, ['uri-id' => $uri_id, 'origin' => true]); if (!empty($post)) { ActivityPub\Transmitter::storeReceiversForItem($post); $tags = Tag::getByURIId($uri_id, [Tag::TO, Tag::CC, Tag::BCC, Tag::AUDIENCE]); @@ -1512,10 +1512,14 @@ class Transmitter * * @param array $tags Tag array * @param string $text Text containing tags like :tag: + * @return string normalized text */ private static function addEmojiTags(array &$tags, string $text) { - foreach (Smilies::extractUsedSmilies($text, true) as $name => $url) { + $emojis = Smilies::extractUsedSmilies($text); + $normalized = $emojis['']; + unset($emojis['']); + foreach ($emojis as $name => $url) { $tags[] = [ 'type' => 'Emoji', 'name' => $name, @@ -1525,6 +1529,7 @@ class Transmitter ], ]; } + return $normalized; } /** @@ -1559,8 +1564,6 @@ class Transmitter } } - self::addEmojiTags($tags, $item['body']); - $announce = self::getAnnounceArray($item); // Mention the original author upon commented reshares if (!empty($announce['comment'])) { @@ -1808,10 +1811,11 @@ class Transmitter $item = Post\Media::addHTMLAttachmentToItem($item); $body = $item['body']; - + $emojis = []; if ($type == 'Note') { $body = $item['raw-body'] ?? self::removePictures($body); } + $body = self::addEmojiTags($emojis, $body); /** * @todo Improve the automated summary @@ -1893,7 +1897,7 @@ class Transmitter } $data['attachment'] = self::createAttachmentList($item); - $data['tag'] = self::createTagList($item, $data['quoteUrl'] ?? ''); + $data['tag'] = array_merge(self::createTagList($item, $data['quoteUrl'] ?? ''), $emojis); if (empty($data['location']) && (!empty($item['coord']) || !empty($item['location']))) { $data['location'] = self::createLocation($item); diff --git a/tests/datasets/api.fixture.php b/tests/datasets/api.fixture.php index f5b16f9c6e..876827d748 100644 --- a/tests/datasets/api.fixture.php +++ b/tests/datasets/api.fixture.php @@ -112,6 +112,11 @@ return [ 'uri' => 'http://localhost/profile/mutualcontact', 'guid' => '46', ], + [ + 'id' => 100, + 'uri' => 'https://friendica.local/posts/100', + 'guid' => '100', + ], ], 'contact' => [ [ @@ -363,6 +368,12 @@ return [ 'et sed beatae nihil ullam temporibus corporis ratione blanditiis', 'plink' => 'http://localhost/display/6', ], + [ + 'uri-id' => 100, + 'title' => 'item_title', + 'body' => ':like ~friendica no [code]:dislike[/code] :-p :-[', + 'plink' => 'https://friendica.local/post/100', + ], ], 'post' => [ [ @@ -744,6 +755,28 @@ return [ 'deleted' => 0, 'wall' => 0, ], + // An emoji post + [ + 'id' => 14, + 'uri-id' => 100, + 'visible' => 1, + 'contact-id' => 44, + 'author-id' => 44, + 'owner-id' => 42, + 'causer-id' => 44, + 'uid' => 0, + 'vid' => 8, + 'unseen' => 0, + 'parent-uri-id' => 7, + 'thr-parent-id' => 7, + 'private' => Item::PUBLIC, + 'global' => true, + 'gravity' => Item::GRAVITY_PARENT, + 'network' => Protocol::DFRN, + 'origin' => 0, + 'deleted' => 0, + 'wall' => 0, + ], ], 'post-thread' => [ [ diff --git a/tests/src/Content/SmiliesTest.php b/tests/src/Content/SmiliesTest.php index 38eb743e85..67ba313fe6 100644 --- a/tests/src/Content/SmiliesTest.php +++ b/tests/src/Content/SmiliesTest.php @@ -143,4 +143,107 @@ class SmiliesTest extends FixtureTest { $this->assertEquals($expected, Smilies::isEmojiPost($body)); } + + + public function dataReplace(): array + { + return [ + 'simple-1' => [ + 'expected' => 'alt=":-p"', + 'body' => ':-p', + ], + 'simple-1' => [ + 'expected' => 'alt=":-p"', + 'body' => ' :-p ', + ], + 'word-boundary-1' => [ + 'expected' => ':-pppp', + 'body' => ':-pppp', + ], + 'word-boundary-2' => [ + 'expected' => '~friendicaca', + 'body' => '~friendicaca', + ], + 'symbol-boundary-1' => [ + 'expected' => '(:-p)', + 'body' => '(:-p)', + ], + 'hearts-1' => [ + 'expected' => '❤ (❤) ❤', + 'body' => '<3 (<3) <3', + ], + 'hearts-8' => [ + 'expected' => '(❤❤❤❤❤❤❤❤)', + 'body' => '(<33333333)', + ], + 'no-hearts-1' => [ + 'expected' => '(<30)', + 'body' => '(<30)', + ], + 'no-hearts-2' => [ + 'expected' => '(3<33)', + 'body' => '(3<33)', + ], + ]; + } + + /** + * @dataProvider dataReplace + * + * @param string $expected + * @param string $body + */ + public function testReplace(string $expected, string $body) + { + $result = Smilies::replace($body); + $this->assertStringContainsString($expected, $result); + } + + public function dataExtractUsedSmilies(): array + { + return [ + 'single-smiley' => [ + 'expected' => ['like'], + 'body' => ':like', + 'normalized' => ':like:', + ], + 'multiple-smilies' => [ + 'expected' => ['like', 'dislike'], + 'body' => ':like :dislike', + 'normalized' => ':like: :dislike:', + ], + 'nosmile' => [ + 'expected' => [], + 'body' => '[nosmile] :like :like', + 'normalized' => '[nosmile] :like :like' + ], + 'in-code' => [ + 'expected' => [], + 'body' => '[code]:like :like :like[/code]', + 'normalized' => '[code]:like :like :like[/code]' + ], + '~friendica' => [ + 'expected' => ['friendica'], + 'body' => '~friendica', + 'normalized' => ':friendica:' + ], + ]; + } + + /** + * @dataProvider dataExtractUsedSmilies + * + * @param array $expected + * @param string $body + * @param stirng $normalized + */ + public function testExtractUsedSmilies(array $expected, string $body, string $normalized) + { + $extracted = Smilies::extractUsedSmilies($body); + $this->assertEquals($normalized, $extracted['']); + foreach ($expected as $shortcode) { + $this->assertArrayHasKey($shortcode, $extracted); + } + $this->assertEquals(count($expected), count($extracted) - 1); + } } diff --git a/tests/src/Factory/Api/Mastodon/EmojiTest.php b/tests/src/Factory/Api/Mastodon/EmojiTest.php new file mode 100644 index 0000000000..da67ea1639 --- /dev/null +++ b/tests/src/Factory/Api/Mastodon/EmojiTest.php @@ -0,0 +1,45 @@ +. + * + */ + +namespace Friendica\Test\src\Factory\Api\Mastodon; + +use Friendica\Content\Smilies; +use Friendica\DI; +use Friendica\Test\FixtureTest; + +class EmojiTest extends FixtureTest +{ + protected function setUp(): void + { + parent::setUp(); + + DI::config()->set('system', 'no_smilies', false); + } + + public function testBuiltInCollection() + { + $emoji = DI::mstdnEmoji(); + $collection = $emoji->createCollectionFromSmilies(Smilies::getList())->getArrayCopy(true); + foreach ($collection as $item) { + $this->assertTrue(preg_match('(/images/.*)', $item['url']) === 1, $item['url']); + } + } +} diff --git a/tests/src/Factory/Api/Mastodon/StatusTest.php b/tests/src/Factory/Api/Mastodon/StatusTest.php new file mode 100644 index 0000000000..7593d9a32b --- /dev/null +++ b/tests/src/Factory/Api/Mastodon/StatusTest.php @@ -0,0 +1,61 @@ +. + * + */ + +namespace Friendica\Test\src\Factory\Api\Mastodon; + +use Friendica\Model\Post; +use Friendica\DI; +use Friendica\Test\FixtureTest; + +class StatusTest extends FixtureTest +{ + protected $status; + + protected function setUp(): void + { + parent::setUp(); + + DI::config()->set('system', 'no_smilies', false); + $this->status = DI::mstdnStatus(); + } + + public function testSimpleStatus() + { + $post = Post::selectFirst([], ['id' => 13]); + $this->assertNotNull($post); + $result = $this->status->createFromUriId($post['uri-id']); + $this->assertNotNull($result); + } + + public function testSimpleEmojiStatus() + { + $post = Post::selectFirst([], ['id' => 14]); + $this->assertNotNull($post); + $result = $this->status->createFromUriId($post['uri-id'])->toArray(); + $this->assertEquals(':like: :friendica: no :dislike :p: :embarrassed:', $result['content']); + $emojis = array_fill_keys(['like', 'friendica', 'p', 'embarrassed'], true); + $this->assertEquals(count($emojis), count($result['emojis'])); + foreach ($result['emojis'] as $emoji) { + $this->assertTrue(array_key_exists($emoji['shortcode'], $emojis)); + $this->assertEquals(0, strpos($emoji['url'], 'http')); + } + } +} diff --git a/tests/src/Protocol/ActivityPub/TransmitterTest.php b/tests/src/Protocol/ActivityPub/TransmitterTest.php new file mode 100644 index 0000000000..c7a94bc598 --- /dev/null +++ b/tests/src/Protocol/ActivityPub/TransmitterTest.php @@ -0,0 +1,53 @@ +. + * + */ + +namespace Friendica\Test\src\Protocol\ActivityPub; + +use Friendica\DI; +use Friendica\Model\Post; +use Friendica\Protocol\ActivityPub\Transmitter; +use Friendica\Test\FixtureTest; + +class TransmitterTest extends FixtureTest +{ + protected function setUp(): void + { + parent::setUp(); + + DI::config()->set('system', 'no_smilies', false); + } + + public function testEmojiPost() + { + $post = Post::selectFirst([], ['id' => 14]); + $this->assertNotNull($post); + $note = Transmitter::createNote($post); + $this->assertNotNull($note); + + $this->assertEquals(':like: :friendica: no :dislike :p: :embarrassed:', $note['content']); + $emojis = array_fill_keys(['like', 'friendica', 'p', 'embarrassed'], true); + $this->assertEquals(count($emojis), count($note['tag'])); + foreach ($note['tag'] as $emoji) { + $this->assertTrue(array_key_exists($emoji['name'], $emojis)); + $this->assertEquals('Emoji', $emoji['type']); + } + } +} -- 2.39.5