X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=src%2FContent%2FText%2FBBCode.php;h=6c441bac6fb152621a948c8a98da59cee72b4020;hb=a66e9b81bafdb94fcb5405818f721c3a0dc2a418;hp=13d4e1b055f66b71c1e369fa5e6151819aee47b8;hpb=c845415a99ebc348103815a7b2c55b15c75cdd24;p=friendica.git diff --git a/src/Content/Text/BBCode.php b/src/Content/Text/BBCode.php index 13d4e1b055..6c441bac6f 100644 --- a/src/Content/Text/BBCode.php +++ b/src/Content/Text/BBCode.php @@ -1,34 +1,36 @@ = 500) && ($picturedata[0] >= $picturedata[1])) { - $post["image"] = $matches[1]; - } else { - $post["preview"] = $matches[1]; + if ($picturedata) { + if (($picturedata[0] >= 500) && ($picturedata[0] >= $picturedata[1])) { + $post["image"] = $matches[1]; + } else { + $post["preview"] = $matches[1]; + } } } @@ -174,7 +178,7 @@ class BBCode } if ($title != "") { - $title = BBCode::convert(html_entity_decode($title, ENT_QUOTES, 'UTF-8'), false, true); + $title = self::convert(html_entity_decode($title, ENT_QUOTES, 'UTF-8'), false, true); $title = html_entity_decode($title, ENT_QUOTES, 'UTF-8'); $title = str_replace(["[", "]"], ["[", "]"], $title); $data["title"] = $title; @@ -239,6 +243,9 @@ class BBCode $body = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $body); $URLSearchString = "^\[\]"; + + $body = preg_replace("/\[img\=([$URLSearchString]*)\](.*?)\[\/img\]/ism", '[img]$1[/img]', $body); + if (preg_match_all("(\[url=([$URLSearchString]*)\]\s*\[img\]([$URLSearchString]*)\[\/img\]\s*\[\/url\])ism", $body, $pictures, PREG_SET_ORDER)) { if ((count($pictures) == 1) && !$has_title) { // Checking, if the link goes to a picture @@ -264,7 +271,7 @@ class BBCode $post["text"] = str_replace($pictures[0][0], "", $body); } else { $imgdata = Image::getInfoFromURL($pictures[0][1]); - if (substr($imgdata["mime"], 0, 6) == "image/") { + if ($imgdata && substr($imgdata["mime"], 0, 6) == "image/") { $post["type"] = "photo"; $post["image"] = $pictures[0][1]; $post["preview"] = $pictures[0][2]; @@ -336,159 +343,20 @@ class BBCode } /** - * @brief Convert a message into plaintext for connectors to other networks + * @brief Converts a BBCode text into plaintext * - * @param array $b The message array that is about to be posted - * @param int $limit The maximum number of characters when posting to that network - * @param bool $includedlinks Has an attached link to be included into the message? - * @param int $htmlmode This triggers the behaviour of the bbcode conversion - * @param string $target_network Name of the network where the post should go to. + * @param bool $keep_urls Whether to keep URLs in the resulting plaintext * - * @return string The converted message + * @return string */ - public static function toPlaintext($b, $limit = 0, $includedlinks = false, $htmlmode = 2, $target_network = "") + public static function toPlaintext($text, $keep_urls = true) { - // Remove the hash tags - $URLSearchString = "^\[\]"; - $body = preg_replace("/([#@])\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", '$1$3', $b["body"]); - - // Add an URL element if the text contains a raw link - $body = preg_replace("/([^\]\='".'"'."]|^)(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,]+)/ism", '$1[url]$2[/url]', $body); - - // Remove the abstract - $body = self::stripAbstract($body); - - // At first look at data that is attached via "type-..." stuff - // This will hopefully replaced with a dedicated bbcode later - //$post = self::getAttachedData($b["body"]); - $post = self::getAttachedData($body, $b); - - if (($b["title"] != "") && ($post["text"] != "")) { - $post["text"] = trim($b["title"]."\n\n".$post["text"]); - } elseif ($b["title"] != "") { - $post["text"] = trim($b["title"]); - } - - $abstract = ""; - - // Fetch the abstract from the given target network - if ($target_network != "") { - $default_abstract = self::getAbstract($b["body"]); - $abstract = self::getAbstract($b["body"], $target_network); - - // If we post to a network with no limit we only fetch - // an abstract exactly for this network - if (($limit == 0) && ($abstract == $default_abstract)) { - $abstract = ""; - } - } else {// Try to guess the correct target network - switch ($htmlmode) { - case 8: - $abstract = self::getAbstract($b["body"], NETWORK_TWITTER); - break; - case 7: - $abstract = self::getAbstract($b["body"], NETWORK_STATUSNET); - break; - case 6: - $abstract = self::getAbstract($b["body"], NETWORK_APPNET); - break; - default: // We don't know the exact target. - // We fetch an abstract since there is a posting limit. - if ($limit > 0) { - $abstract = self::getAbstract($b["body"]); - } - } - } - - if ($abstract != "") { - $post["text"] = $abstract; - - if ($post["type"] == "text") { - $post["type"] = "link"; - $post["url"] = $b["plink"]; - } + $naked_text = preg_replace('/\[(.+?)\]/','', $text); + if (!$keep_urls) { + $naked_text = preg_replace('#https?\://[^\s<]+[^\s\.\)]#i', '', $naked_text); } - $html = BBCode::convert($post["text"].$post["after"], false, $htmlmode); - $msg = html2plain($html, 0, true); - $msg = trim(html_entity_decode($msg, ENT_QUOTES, 'UTF-8')); - - $link = ""; - if ($includedlinks) { - if ($post["type"] == "link") { - $link = $post["url"]; - } elseif ($post["type"] == "text") { - $link = $post["url"]; - } elseif ($post["type"] == "video") { - $link = $post["url"]; - } elseif ($post["type"] == "photo") { - $link = $post["image"]; - } - - if (($msg == "") && isset($post["title"])) { - $msg = trim($post["title"]); - } - - if (($msg == "") && isset($post["description"])) { - $msg = trim($post["description"]); - } - - // If the link is already contained in the post, then it neeedn't to be added again - // But: if the link is beyond the limit, then it has to be added. - if (($link != "") && strstr($msg, $link)) { - $pos = strpos($msg, $link); - - // Will the text be shortened in the link? - // Or is the link the last item in the post? - if (($limit > 0) && ($pos < $limit) && (($pos + 23 > $limit) || ($pos + strlen($link) == strlen($msg)))) { - $msg = trim(str_replace($link, "", $msg)); - } elseif (($limit == 0) || ($pos < $limit)) { - // The limit has to be increased since it will be shortened - but not now - // Only do it with Twitter (htmlmode = 8) - if (($limit > 0) && (strlen($link) > 23) && ($htmlmode == 8)) { - $limit = $limit - 23 + strlen($link); - } - - $link = ""; - - if ($post["type"] == "text") { - unset($post["url"]); - } - } - } - } - - if ($limit > 0) { - // Reduce multiple spaces - // When posted to a network with limited space, we try to gain space where possible - while (strpos($msg, " ") !== false) { - $msg = str_replace(" ", " ", $msg); - } - - // Twitter is using its own limiter, so we always assume that shortened links will have this length - if (iconv_strlen($link, "UTF-8") > 0) { - $limit = $limit - 23; - } - - if (iconv_strlen($msg, "UTF-8") > $limit) { - if (($post["type"] == "text") && isset($post["url"])) { - $post["url"] = $b["plink"]; - } elseif (!isset($post["url"])) { - $limit = $limit - 23; - $post["url"] = $b["plink"]; - // Which purpose has this line? It is now uncommented, but left as a reminder - //} elseif (strpos($b["body"], "[share") !== false) { - // $post["url"] = $b["plink"]; - } elseif (PConfig::get($b["uid"], "system", "no_intelligent_shortening")) { - $post["url"] = $b["plink"]; - } - $msg = Plaintext::shorten($msg, $limit); - } - } - - $post["text"] = trim($msg); - - return($post); + return $naked_text; } public static function scaleExternalImages($srctext, $include_link = true, $scale_replace = false) @@ -707,7 +575,7 @@ class BBCode if ($data["description"] != "" && $data["description"] != $data["title"]) { // Sanitize the HTML by converting it to BBCode - $bbcode = html2bbcode($data["description"]); + $bbcode = HTML::toBBCode($data["description"]); $return .= sprintf('
%s
', trim(self::convert($bbcode))); } if ($data["type"] == "link") { @@ -762,27 +630,6 @@ class BBCode return $text . "\n" . $data["after"]; } - private static function cleanCss($input) - { - $cleaned = ""; - - $input = strtolower($input); - - for ($i = 0; $i < strlen($input); $i++) { - $char = substr($input, $i, 1); - - if (($char >= "a") && ($char <= "z")) { - $cleaned .= $char; - } - - if (!(strpos(" #;:0123456789-_.%", $char) === false)) { - $cleaned .= $char; - } - } - - return $cleaned; - } - /** * Converts [url] BBCodes in a format that looks fine on Mastodon. (callback function) * @@ -1197,7 +1044,7 @@ class BBCode $text = Cache::get($match[1]); if (is_null($text)) { - $a = get_app(); + $a = self::getApp(); $stamp1 = microtime(true); @@ -1220,7 +1067,7 @@ class BBCode $doc = new DOMDocument(); @$doc->loadHTML($body); - $xpath = new DomXPath($doc); + $xpath = new DOMXPath($doc); $list = $xpath->query("//meta[@name]"); foreach ($list as $node) { $attr = []; @@ -1256,7 +1103,7 @@ class BBCode $text = Cache::get($match[1]); if (is_null($text)) { - $a = get_app(); + $a = self::getApp(); $stamp1 = microtime(true); @@ -1280,7 +1127,7 @@ class BBCode $doc = new DOMDocument(); @$doc->loadHTML($body); - $xpath = new DomXPath($doc); + $xpath = new DOMXPath($doc); $list = $xpath->query("//meta[@name]"); foreach ($list as $node) { $attr = []; @@ -1309,13 +1156,17 @@ class BBCode private static function textHighlightCallback($match) { + // Fallback in case the language doesn't exist + $return = '[code]' . $match[2] . '[/code]'; + if (in_array(strtolower($match[1]), ['php', 'css', 'mysql', 'sql', 'abap', 'diff', 'html', 'perl', 'ruby', - 'vbscript', 'avrc', 'dtd', 'java', 'xml', 'cpp', 'python', 'javascript', 'js', 'sh']) + 'vbscript', 'avrc', 'dtd', 'java', 'xml', 'cpp', 'python', 'javascript', 'js', 'sh', 'bash']) ) { - return text_highlight($match[2], strtolower($match[1])); + $return = text_highlight($match[2], strtolower($match[1])); } - return $match[0]; + + return $return; } /** @@ -1343,7 +1194,7 @@ class BBCode */ public static function convert($text, $try_oembed = true, $simple_html = false, $for_plaintext = false) { - $a = get_app(); + $a = self::getApp(); /* * preg_match_callback function to replace potential Oembed tags with Oembed content @@ -1392,7 +1243,7 @@ class BBCode // After we're finished processing the bbcode we'll // replace all of the event code with a reformatted version. - $ev = bbtoevent($text); + $ev = Event::fromBBCode($text); // Replace any html brackets with HTML Entities to prevent executing HTML or script // Don't use strip_tags here because it breaks [url] search by replacing & with amp @@ -1593,7 +1444,7 @@ class BBCode $text = preg_replace("(\[u\](.*?)\[\/u\])ism", '$1', $text); // Check for strike-through text - $text = preg_replace("(\[s\](.*?)\[\/s\])ism", '$1', $text); + $text = preg_replace("(\[s\](.*?)\[\/s\])ism", '$1', $text); // Check for over-line text $text = preg_replace("(\[o\](.*?)\[\/o\])ism", '$1', $text); @@ -1616,7 +1467,7 @@ class BBCode $text = preg_replace_callback( "(\[style=(.*?)\](.*?)\[\/style\])ism", function ($match) { - return "" . $match[2] . ""; + return "" . $match[2] . ""; }, $text ); @@ -1625,7 +1476,7 @@ class BBCode $text = preg_replace_callback( "(\[class=(.*?)\](.*?)\[\/class\])ism", function ($match) { - return "" . $match[2] . ""; + return "" . $match[2] . ""; }, $text ); @@ -1734,6 +1585,14 @@ class BBCode $text = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '', $text); $text = preg_replace("/\[zmg\=([0-9]*)x([0-9]*)\](.*?)\[\/zmg\]/ism", '', $text); + $text = preg_replace_callback("/\[img\=([$URLSearchString]*)\](.*?)\[\/img\]/ism", + function ($matches) { + $matches[1] = proxy_url($matches[1]); + $matches[2] = htmlspecialchars($matches[2], ENT_COMPAT); + return '' . $matches[2] . ''; + }, + $text); + // Images // [img]pathtoimage[/img] $text = preg_replace_callback( @@ -1830,7 +1689,7 @@ class BBCode // start which is always required). Allow desc with a missing summary for compatibility. if ((x($ev, 'desc') || x($ev, 'summary')) && x($ev, 'start')) { - $sub = format_event_html($ev, $simple_html); + $sub = Event::getHTML($ev, $simple_html); $text = preg_replace("/\[event\-summary\](.*?)\[\/event\-summary\]/ism", '', $text); $text = preg_replace("/\[event\-description\](.*?)\[\/event\-description\]/ism", '', $text); @@ -1865,10 +1724,12 @@ class BBCode $text = preg_replace_callback("/\[nobb\](.*?)\[\/nobb\]/ism", 'self::unescapeNoparseCallback', $text); $text = preg_replace_callback("/\[pre\](.*?)\[\/pre\]/ism", 'self::unescapeNoparseCallback', $text); - + /// @todo What is the meaning of these lines? $text = preg_replace('/\[\&\;([#a-z0-9]+)\;\]/', '&$1;', $text); $text = preg_replace('/\&\#039\;/', '\'', $text); - $text = preg_replace('/\"\;/', '"', $text); + + // Currently deactivated, it made problems with " inside of alt texts. + //$text = preg_replace('/\"\;/', '"', $text); // fix any escaped ampersands that may have been converted into links $text = preg_replace('/\<([^>]*?)(src|href)=(.*?)\&\;(.*?)\>/ism', '<$1$2=$3&$4>', $text); @@ -1947,7 +1808,7 @@ class BBCode * @param string $addon The addon for which the abstract is meant for * @return string The abstract */ - private static function getAbstract($text, $addon = "") + public static function getAbstract($text, $addon = "") { $abstract = ""; $abstracts = []; @@ -1969,4 +1830,148 @@ class BBCode return $abstract; } + + /** + * @brief Callback function to replace a Friendica style mention in a mention for Diaspora + * + * @param array $match Matching values for the callback + * @return string Replaced mention + */ + private static function bbCodeMention2DiasporaCallback($match) + { + $contact = Contact::getDetailsByURL($match[3]); + + if (empty($contact['addr'])) { + $contact = Probe::uri($match[3]); + } + + if (empty($contact['addr'])) { + return $match[0]; + } + + $mention = '@{' . $match[2] . '; ' . $contact['addr'] . '}'; + return $mention; + } + + /** + * @brief Converts a BBCode text into Markdown + * + * This function converts a BBCode item body to be sent to Markdown-enabled + * systems like Diaspora and Libertree + * + * @param string $text + * @param bool $for_diaspora Diaspora requires more changes than Libertree + * @return string + */ + public static function toMarkdown($text, $for_diaspora = true) + { + $a = self::getApp(); + + $original_text = $text; + + // Since Diaspora is creating a summary for links, this function removes them before posting + if ($for_diaspora) { + $text = self::removeShareInformation($text); + } + + /** + * Transform #tags, strip off the [url] and replace spaces with underscore + */ + $url_search_string = "^\[\]"; + $text = preg_replace_callback("/#\[url\=([$url_search_string]*)\](.*?)\[\/url\]/i", + function ($matches) { + return '#' . str_replace(' ', '_', $matches[2]); + }, + $text + ); + + // Converting images with size parameters to simple images. Markdown doesn't know it. + $text = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $text); + + // Extracting multi-line code blocks before the whitespace processing/code highlighter in self::convert() + $codeblocks = []; + + $text = preg_replace_callback("#\[code(?:=([^\]]*))?\](.*?)\[\/code\]#is", + function ($matches) use (&$codeblocks) { + $return = $matches[0]; + if (strpos($matches[2], "\n") !== false) { + $return = '#codeblock-' . count($codeblocks) . '#'; + + $prefix = '````' . $matches[1] . PHP_EOL; + $codeblocks[] = $prefix . trim($matches[2]) . PHP_EOL . '````'; + } + return $return; + }, + $text + ); + + // Convert it to HTML - don't try oembed + if ($for_diaspora) { + $text = self::convert($text, false, 3); + + // Add all tags that maybe were removed + if (preg_match_all("/#\[url\=([$url_search_string]*)\](.*?)\[\/url\]/ism", $original_text, $tags)) { + $tagline = ""; + foreach ($tags[2] as $tag) { + $tag = html_entity_decode($tag, ENT_QUOTES, 'UTF-8'); + if (!strpos(html_entity_decode($text, ENT_QUOTES, 'UTF-8'), '#' . $tag)) { + $tagline .= '#' . $tag . ' '; + } + } + $text = $text . " " . $tagline; + } + } else { + $text = self::convert($text, false, 4); + } + + // mask some special HTML chars from conversation to markdown + $text = str_replace(['<', '>', '&'], ['&_lt_;', '&_gt_;', '&_amp_;'], $text); + + // If a link is followed by a quote then there should be a newline before it + // Maybe we should make this newline at every time before a quote. + $text = str_replace(["
"], ["
"], $text); + + $stamp1 = microtime(true); + + // Now convert HTML to Markdown + $converter = new HtmlConverter(); + $text = $converter->convert($text); + + // unmask the special chars back to HTML + $text = str_replace(['&\_lt\_;', '&\_gt\_;', '&\_amp\_;'], ['<', '>', '&'], $text); + + $a->save_timestamp($stamp1, "parser"); + + // Libertree has a problem with escaped hashtags. + $text = str_replace(['\#'], ['#'], $text); + + // Remove any leading or trailing whitespace, as this will mess up + // the Diaspora signature verification and cause the item to disappear + $text = trim($text); + + if ($for_diaspora) { + $url_search_string = "^\[\]"; + $text = preg_replace_callback( + "/([@]\[(.*?)\])\(([$url_search_string]*?)\)/ism", + ['self', 'bbCodeMention2DiasporaCallback'], + $text + ); + } + + // Restore code blocks + $text = preg_replace_callback('/#codeblock-([0-9]+)#/iU', + function ($matches) use ($codeblocks) { + $return = ''; + if (isset($codeblocks[intval($matches[1])])) { + $return = $codeblocks[$matches[1]]; + } + return $return; + }, + $text + ); + + Addon::callHooks('bb2diaspora', $text); + + return $text; + } }