X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;ds=sidebyside;f=src%2FContent%2FText%2FBBCode.php;h=3bb9fda125b1e22b62ba0c75f3285c0ca37e8d86;hb=f23ecaff6af1982112469f90d6bcdf0408b0f22e;hp=ab7300da18b5c3e4f35b4bed9745374aee45bde5;hpb=09a612670ad79960dba46b58d87780513e8e4c3f;p=friendica.git diff --git a/src/Content/Text/BBCode.php b/src/Content/Text/BBCode.php index ab7300da18..3bb9fda125 100644 --- a/src/Content/Text/BBCode.php +++ b/src/Content/Text/BBCode.php @@ -230,18 +230,73 @@ class BBCode { DI::profiler()->startRecording('rendering'); // Remove pictures in advance to avoid unneeded proxy calls + $text = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", ' ', $text); $text = preg_replace("/\[img\=(.*?)\](.*?)\[\/img\]/ism", ' $2 ', $text); $text = preg_replace("/\[img.*?\[\/img\]/ism", ' ', $text); // Remove attachment $text = self::replaceAttachment($text); - $naked_text = HTML::toPlaintext(self::convert($text, false, BBCode::EXTERNAL, true), 0, !$keep_urls); + $naked_text = HTML::toPlaintext(self::convert($text, false, self::EXTERNAL, true), 0, !$keep_urls); DI::profiler()->stopRecording(); return $naked_text; } + /** + * Converts text into a format that can be used for the channel search and the language detection. + * + * @param string $text + * @param integer $uri_id + * @return string + */ + public static function toSearchText(string $text, int $uri_id): string + { + // Removes attachments + $text = self::removeAttachment($text); + + // Add images because of possible alt texts + if (!empty($uri_id)) { + $text = Post\Media::addAttachmentsToBody($uri_id, $text, [Post\Media::IMAGE]); + } + + if (empty($text)) { + return ''; + } + + // Remove links without a link description + $text = preg_replace("~\[url\=.*\]https?:.*\[\/url\]~", ' ', $text); + + // Remove pictures + $text = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", ' ', $text); + + // Replace picture with the alt description + $text = preg_replace("/\[img\=.*?\](.*?)\[\/img\]/ism", ' $1 ', $text); + + // Remove the other pictures + $text = preg_replace("/\[img.*?\[\/img\]/ism", ' ', $text); + + // Removes mentions, remove links from hashtags + $text = preg_replace('/[@!]\[url\=.*?\].*?\[\/url\]/ism', ' ', $text); + $text = preg_replace('/[#]\[url\=.*?\](.*?)\[\/url\]/ism', ' #$1 ', $text); + $text = preg_replace('/[@!#]?\[url.*?\[\/url\]/ism', ' ', $text); + $text = preg_replace("/\[url=[^\[\]]*\](.*)\[\/url\]/Usi", ' $1 ', $text); + + // Convert it to plain text + $text = self::toPlaintext($text, false); + + // Remove possibly remaining links + $text = preg_replace(Strings::autoLinkRegEx(), '', $text); + + // Remove all unneeded white space + do { + $oldtext = $text; + $text = str_replace([' ', "\n", "\r", '"'], ' ', $text); + } while ($oldtext != $text); + + return trim($text); + } + private static function proxyUrl(string $image, int $simplehtml = self::INTERNAL, int $uriid = 0, string $size = ''): string { // Only send proxied pictures to API and for internal display @@ -931,7 +986,7 @@ class BBCode $network = $contact['network'] ?? Protocol::PHANTOM; $tpl = Renderer::getMarkupTemplate('shared_content.tpl'); - $text .= BBCode::SHARED_ANCHOR . Renderer::replaceMacros($tpl, [ + $text .= self::SHARED_ANCHOR . Renderer::replaceMacros($tpl, [ '$profile' => $attributes['profile'], '$avatar' => $attributes['avatar'], '$author' => $attributes['author'], @@ -1112,6 +1167,7 @@ class BBCode public static function removeLinks(string $bbcode): string { DI::profiler()->startRecording('rendering'); + $bbcode = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", ' ', $bbcode); $bbcode = preg_replace("/\[img\=(.*?)\](.*?)\[\/img\]/ism", ' $1 ', $bbcode); $bbcode = preg_replace("/\[img.*?\[\/img\]/ism", ' ', $bbcode); @@ -1178,7 +1234,7 @@ class BBCode } /** - * Expand Youtube and Vimeo links to + * Expand Youtube and Vimeo links to * * @param string $text * @return string @@ -1331,7 +1387,7 @@ class BBCode "\n[hr]", "[hr]\n", " [hr]", "[hr] ", "\n[attachment ", " [attachment ", "\n[/attachment]", "[/attachment]\n", " [/attachment]", "[/attachment] ", "[table]\n", "[table] ", " [table]", "\n[/table]", " [/table]", "[/table] ", - " \n", "\t\n", "[/li]\n", "\n[li]", "\n[*]", + " \n", "\t\n", "[/li]\n", "\n[li]", "\n[*]", ]; $replace = [ "[th]", "[th]", "[th]", "[/th]", "[/th]", "[/th]", @@ -1424,14 +1480,14 @@ class BBCode if ($simple_html == self::INTERNAL) { //Ensure to always start with

if possible $heading_count = 0; - for ($level = 6; $level > 0; $level--) { + for ($level = 6; $level > 0; $level--) { if (preg_match("(\[h$level\].*?\[\/h$level\])ism", $text)) { $heading_count++; } } if ($heading_count > 0) { $heading = min($heading_count + 3, 6); - for ($level = 6; $level > 0; $level--) { + for ($level = 6; $level > 0; $level--) { if (preg_match("(\[h$level\].*?\[\/h$level\])ism", $text)) { $text = preg_replace("(\[h$level\](.*?)\[\/h$level\])ism", "

$1

", $text); $heading--; @@ -1492,7 +1548,11 @@ class BBCode $text = preg_replace("(\[style=(.*?)\](.*?)\[\/style\])ism", '$2', $text); // Mastodon Emoji (internal tag, do not document for users) - $text = preg_replace("(\[emoji=(.*?)](.*?)\[/emoji])ism", '$2', $text); + if ($simple_html == self::MASTODON_API) { + $text = preg_replace("(\[emoji=(.*?)](.*?)\[/emoji])ism", '$2', $text); + } else { + $text = preg_replace("(\[emoji=(.*?)](.*?)\[/emoji])ism", '$2', $text); + } // Check for CSS classes // @deprecated since 2021.12, left for backward-compatibility reasons @@ -1735,12 +1795,8 @@ class BBCode $text = preg_replace("/\[event\-id\](.*?)\[\/event\-id\]/ism", '', $text); } - if (!$for_plaintext && DI::config()->get('system', 'big_emojis') && ($simple_html != self::DIASPORA)) { - $conv = html_entity_decode(str_replace([' ', "\n", "\r"], '', $text)); - // Emojis are always 4 byte Unicode characters - if (!empty($conv) && (strlen($conv) / mb_strlen($conv) == 4)) { - $text = '' . $text . ''; - } + if (!$for_plaintext && DI::config()->get('system', 'big_emojis') && ($simple_html != self::DIASPORA) && Smilies::isEmojiPost($text)) { + $text = '' . $text . ''; } // Handle mentions and hashtag links @@ -1996,7 +2052,7 @@ class BBCode { DI::profiler()->startRecording('rendering'); - $text = BBCode::performWithEscapedTags($text, ['code', 'noparse', 'nobb', 'pre'], function ($text) { + $text = self::performWithEscapedTags($text, ['code', 'noparse', 'nobb', 'pre'], function ($text) { $text = preg_replace("/[\s|\n]*\[abstract\].*?\[\/abstract\][\s|\n]*/ism", ' ', $text); $text = preg_replace("/[\s|\n]*\[abstract=.*?\].*?\[\/abstract][\s|\n]*/ism", ' ', $text); return $text; @@ -2018,7 +2074,7 @@ class BBCode DI::profiler()->startRecording('rendering'); $addon = strtolower($addon); - $abstract = BBCode::performWithEscapedTags($text, ['code', 'noparse', 'nobb', 'pre'], function ($text) use ($addon) { + $abstract = self::performWithEscapedTags($text, ['code', 'noparse', 'nobb', 'pre'], function ($text) use ($addon) { if ($addon && preg_match('#\[abstract=' . preg_quote($addon, '#') . '](.*?)\[/abstract]#ism', $text, $matches)) { return $matches[1]; } @@ -2115,6 +2171,9 @@ class BBCode // Maybe we should make this newline at every time before a quote. $text = str_replace(['

'], ['
'], $text); + // The converter doesn't convert these elements + $text = str_replace(['
', '
'], ['

', '

'], $text); + // Now convert HTML to Markdown $text = HTML::toMarkdown($text);