]> git.mxchange.org Git - friendica.git/blobdiff - src/Content/Text/BBCode.php
Merge branch 'master' into develop
[friendica.git] / src / Content / Text / BBCode.php
index 71daa1d14559f7b2382f333aeefbff50550cbc7f..82c6f46980cbc8f5ab2f9e79e20626abb73172c4 100644 (file)
@@ -1,34 +1,36 @@
 <?php
+
 /**
  * @file src/Content/Text/BBCode.php
  */
+
 namespace Friendica\Content\Text;
 
 use DOMDocument;
-use DomXPath;
+use DOMXPath;
 use Exception;
+use Friendica\BaseObject;
 use Friendica\Content\OEmbed;
 use Friendica\Content\Smilies;
-use Friendica\Content\Text\Plaintext;
 use Friendica\Core\Addon;
 use Friendica\Core\Cache;
 use Friendica\Core\Config;
 use Friendica\Core\L10n;
-use Friendica\Core\Protocol;
 use Friendica\Core\PConfig;
+use Friendica\Core\Protocol;
 use Friendica\Core\System;
 use Friendica\Model\Contact;
+use Friendica\Network\Probe;
 use Friendica\Object\Image;
 use Friendica\Util\Map;
 use Friendica\Util\Network;
 use Friendica\Util\ParseUrl;
+use League\HTMLToMarkdown\HtmlConverter;
 
-require_once "include/bbcode.php";
 require_once "include/event.php";
-require_once "include/html2plain.php";
 require_once "mod/proxy.php";
 
-class BBCode
+class BBCode extends BaseObject
 {
        /**
         * @brief Fetches attachment data that were generated the old way
@@ -174,7 +176,7 @@ class BBCode
                }
 
                if ($title != "") {
-                       $title = bbcode(html_entity_decode($title, ENT_QUOTES, 'UTF-8'), false, false, true);
+                       $title = self::convert(html_entity_decode($title, ENT_QUOTES, 'UTF-8'), false, true);
                        $title = html_entity_decode($title, ENT_QUOTES, 'UTF-8');
                        $title = str_replace(["[", "]"], ["&#91;", "&#93;"], $title);
                        $data["title"] = $title;
@@ -409,8 +411,8 @@ class BBCode
                        }
                }
 
-               $html = bbcode($post["text"].$post["after"], false, false, $htmlmode);
-               $msg = html2plain($html, 0, true);
+               $html = self::convert($post["text"].$post["after"], false, $htmlmode);
+               $msg = HTML::toPlaintext($html, 0, true);
                $msg = trim(html_entity_decode($msg, ENT_QUOTES, 'UTF-8'));
 
                $link = "";
@@ -678,7 +680,7 @@ class BBCode
 
                $return = '';
                if ($simplehtml == 7) {
-                       $return = self::convertUrlForMastodon($data["url"]);
+                       $return = self::convertUrlForOStatus($data["url"]);
                } elseif (($simplehtml != 4) && ($simplehtml != 0)) {
                        $return = sprintf('<a href="%s" target="_blank">%s</a><br>', $data["url"], $data["title"]);
                } else {
@@ -706,9 +708,10 @@ class BBCode
                                }
 
                                if ($data["description"] != "" && $data["description"] != $data["title"]) {
-                                       $return .= sprintf('<blockquote>%s</blockquote>', trim(bbcode($data["description"])));
+                                       // Sanitize the HTML by converting it to BBCode
+                                       $bbcode = HTML::toBBCode($data["description"]);
+                                       $return .= sprintf('<blockquote>%s</blockquote>', trim(self::convert($bbcode)));
                                }
-
                                if ($data["type"] == "link") {
                                        $return .= sprintf('<sup><a href="%s">%s</a></sup>', $data['url'], parse_url($data['url'], PHP_URL_HOST));
                                }
@@ -755,33 +758,12 @@ class BBCode
                if (($data["url"] != "") && ($data["title"] != "")) {
                        $text .= "\n[url=" . $data["url"] . "]" . $data["title"] . "[/url]";
                } elseif (($data["url"] != "")) {
-                       $text .= "\n" . $data["url"];
+                       $text .= "\n[url]" . $data["url"] . "[/url]";
                }
 
                return $text . "\n" . $data["after"];
        }
 
-       private static function cleanCss($input)
-       {
-               $cleaned = "";
-
-               $input = strtolower($input);
-
-               for ($i = 0; $i < strlen($input); $i++) {
-                       $char = substr($input, $i, 1);
-
-                       if (($char >= "a") && ($char <= "z")) {
-                               $cleaned .= $char;
-                       }
-
-                       if (!(strpos(" #;:0123456789-_.%", $char) === false)) {
-                               $cleaned .= $char;
-                       }
-               }
-
-               return $cleaned;
-       }
-
        /**
         * Converts [url] BBCodes in a format that looks fine on Mastodon. (callback function)
         *
@@ -789,7 +771,7 @@ class BBCode
         * @param array $match Array with the matching values
         * @return string reformatted link including HTML codes
         */
-       private static function convertUrlForMastodonCallback($match)
+       private static function convertUrlForOStatusCallback($match)
        {
                $url = $match[1];
 
@@ -802,34 +784,27 @@ class BBCode
                        return $match[0];
                }
 
-               return self::convertUrlForMastodon($url);
+               return self::convertUrlForOStatus($url);
        }
 
        /**
-        * @brief Converts [url] BBCodes in a format that looks fine on Mastodon and GNU Social.
+        * @brief Converts [url] BBCodes in a format that looks fine on OStatus systems.
         * @param string $url URL that is about to be reformatted
         * @return string reformatted link including HTML codes
         */
-       private static function convertUrlForMastodon($url)
+       private static function convertUrlForOStatus($url)
        {
                $parts = parse_url($url);
                $scheme = $parts['scheme'] . '://';
                $styled_url = str_replace($scheme, '', $url);
 
-               $html = '<a href="%s" class="attachment" rel="nofollow noopener" target="_blank">' .
-                       '<span class="invisible">%s</span>';
-
                if (strlen($styled_url) > 30) {
-                       $html .= '<span class="ellipsis">%s</span>' .
-                               '<span class="invisible">%s</span></a>';
-
-                       $ellipsis = substr($styled_url, 0, 30);
-                       $rest = substr($styled_url, 30);
-                       return sprintf($html, $url, $scheme, $ellipsis, $rest);
-               } else {
-                       $html .= '%s</a>';
-                       return sprintf($html, $url, $scheme, $styled_url);
+                       $styled_url = substr($styled_url, 0, 30) . "…";
                }
+
+               $html = '<a href="%s" target="_blank">%s</a>';
+
+               return sprintf($html, $url, $styled_url);
        }
 
        /*
@@ -1078,7 +1053,7 @@ class BBCode
                // We only call this so that a previously unknown contact can be added.
                // This is important for the function "Model\Contact::getDetailsByURL()".
                // This function then can fetch an entry from the contact table.
-               Contact::getIdForURL($profile, 0);
+               Contact::getIdForURL($profile, 0, true);
 
                $data = Contact::getDetailsByURL($profile);
 
@@ -1124,13 +1099,13 @@ class BBCode
                                }
 
                                if (stripos(normalise_link($link), 'http://twitter.com/') === 0) {
+                                       $text .= '<br /><a href="' . $link . '">' . $link . '</a>';
+                               } else {
                                        $text .= $headline . '<blockquote>' . trim($share[3]) . "</blockquote><br />";
 
                                        if ($link != "") {
                                                $text .= '<br /><a href="' . $link . '">[l]</a>';
                                        }
-                               } else {
-                                       $text .= '<br /><a href="' . $link . '">' . $link . '</a>';
                                }
 
                                break;
@@ -1203,7 +1178,7 @@ class BBCode
                $text = Cache::get($match[1]);
 
                if (is_null($text)) {
-                       $a = get_app();
+                       $a = self::getApp();
 
                        $stamp1 = microtime(true);
 
@@ -1226,7 +1201,7 @@ class BBCode
 
                                $doc = new DOMDocument();
                                @$doc->loadHTML($body);
-                               $xpath = new DomXPath($doc);
+                               $xpath = new DOMXPath($doc);
                                $list = $xpath->query("//meta[@name]");
                                foreach ($list as $node) {
                                        $attr = [];
@@ -1262,7 +1237,7 @@ class BBCode
                $text = Cache::get($match[1]);
 
                if (is_null($text)) {
-                       $a = get_app();
+                       $a = self::getApp();
 
                        $stamp1 = microtime(true);
 
@@ -1286,7 +1261,7 @@ class BBCode
 
                                $doc = new DOMDocument();
                                @$doc->loadHTML($body);
-                               $xpath = new DomXPath($doc);
+                               $xpath = new DOMXPath($doc);
                                $list = $xpath->query("//meta[@name]");
                                foreach ($list as $node) {
                                        $attr = [];
@@ -1342,15 +1317,14 @@ class BBCode
         * - 8: Used for WP backlink text setting
         *
         * @param string $text
-        * @param bool   $preserve_nl
         * @param bool   $try_oembed
         * @param int    $simple_html
         * @param bool   $for_plaintext
         * @return string
         */
-       public static function convert($text, $preserve_nl = false, $try_oembed = true, $simple_html = false, $for_plaintext = false)
+       public static function convert($text, $try_oembed = true, $simple_html = false, $for_plaintext = false)
        {
-               $a = get_app();
+               $a = self::getApp();
 
                /*
                 * preg_match_callback function to replace potential Oembed tags with Oembed content
@@ -1459,8 +1433,8 @@ class BBCode
                        $autolink_regex = "/([^\]\='".'"'."]|^)(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,]+)/ism";
                        $text = preg_replace($autolink_regex, '$1[url]$2[/url]', $text);
                        if ($simple_html == 7) {
-                               $text = preg_replace_callback("/\[url\]([$URLSearchString]*)\[\/url\]/ism", 'self::convertUrlForMastodonCallback', $text);
-                               $text = preg_replace_callback("/\[url\=([$URLSearchString]*)\]([$URLSearchString]*)\[\/url\]/ism", 'self::convertUrlForMastodonCallback', $text);
+                               $text = preg_replace_callback("/\[url\]([$URLSearchString]*)\[\/url\]/ism", 'self::convertUrlForOStatusCallback', $text);
+                               $text = preg_replace_callback("/\[url\=([$URLSearchString]*)\]([$URLSearchString]*)\[\/url\]/ism", 'self::convertUrlForOStatusCallback', $text);
                        }
                } else {
                        $text = preg_replace("(\[url\]([$URLSearchString]*)\[\/url\])ism", " $1 ", $text);
@@ -1473,10 +1447,6 @@ class BBCode
 
                $text = str_replace(["\r","\n"], ['<br />', '<br />'], $text);
 
-               if ($preserve_nl) {
-                       $text = str_replace(["\n", "\r"], ['', ''], $text);
-               }
-
                // Remove all hashtag addresses
                if ((!$try_oembed || $simple_html) && !in_array($simple_html, [3, 7])) {
                        $text = preg_replace("/([#@!])\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", '$1$3', $text);
@@ -1564,10 +1534,8 @@ class BBCode
                if (strpos($text, '[/map]') !== false) {
                        $text = preg_replace_callback(
                                "/\[map\](.*?)\[\/map\]/ism",
-                               function ($match) {
-                                       // the extra space in the following line is intentional
-                                       // Whyyy? - @MrPetovan
-                                       return str_replace($match[0], '<div class="map"  >' . Map::byLocation($match[1]) . '</div>', $match[0]);
+                               function ($match) use ($simple_html) {
+                                       return str_replace($match[0], '<p class="map">' . Map::byLocation($match[1], $simple_html) . '</p>', $match[0]);
                                },
                                $text
                        );
@@ -1575,16 +1543,14 @@ class BBCode
                if (strpos($text, '[map=') !== false) {
                        $text = preg_replace_callback(
                                "/\[map=(.*?)\]/ism",
-                               function ($match) {
-                                       // the extra space in the following line is intentional
-                                       // Whyyy? - @MrPetovan
-                                       return str_replace($match[0], '<div class="map"  >' . Map::byCoordinates(str_replace('/', ' ', $match[1])) . '</div>', $match[0]);
+                               function ($match) use ($simple_html) {
+                                       return str_replace($match[0], '<p class="map">' . Map::byCoordinates(str_replace('/', ' ', $match[1]), $simple_html) . '</p>', $match[0]);
                                },
                                $text
                        );
                }
                if (strpos($text, '[map]') !== false) {
-                       $text = preg_replace("/\[map\]/", '<div class="map"></div>', $text);
+                       $text = preg_replace("/\[map\]/", '<p class="map"></p>', $text);
                }
 
                // Check for headers
@@ -1631,7 +1597,7 @@ class BBCode
                $text = preg_replace_callback(
                        "(\[style=(.*?)\](.*?)\[\/style\])ism",
                        function ($match) {
-                               return "<span style=\"" . self::cleanCss($match[1]) . ";\">" . $match[2] . "</span>";
+                               return "<span style=\"" . HTML::sanitizeCSS($match[1]) . ";\">" . $match[2] . "</span>";
                        },
                        $text
                );
@@ -1640,7 +1606,7 @@ class BBCode
                $text = preg_replace_callback(
                        "(\[class=(.*?)\](.*?)\[\/class\])ism",
                        function ($match) {
-                               return "<span class=\"" . self::cleanCss($match[1]) . "\">" . $match[2] . "</span>";
+                               return "<span class=\"" . HTML::sanitizeCSS($match[1]) . "\">" . $match[2] . "</span>";
                        },
                        $text
                );
@@ -1984,4 +1950,148 @@ class BBCode
 
                return $abstract;
        }
+
+       /**
+        * @brief Callback function to replace a Friendica style mention in a mention for Diaspora
+        *
+        * @param array $match Matching values for the callback
+        * @return string Replaced mention
+        */
+       private static function bbCodeMention2DiasporaCallback($match)
+       {
+               $contact = Contact::getDetailsByURL($match[3]);
+
+               if (empty($contact['addr'])) {
+                       $contact = Probe::uri($match[3]);
+               }
+
+               if (empty($contact['addr'])) {
+                       return $match[0];
+               }
+
+               $mention = '@{' . $match[2] . '; ' . $contact['addr'] . '}';
+               return $mention;
+       }
+
+       /**
+        * @brief Converts a BBCode text into Markdown
+        *
+        * This function converts a BBCode item body to be sent to Markdown-enabled
+        * systems like Diaspora and Libertree
+        *
+        * @param string $text
+        * @param bool   $for_diaspora Diaspora requires more changes than Libertree
+        * @return string
+        */
+       public static function toMarkdown($text, $for_diaspora = true)
+       {
+               $a = self::getApp();
+
+               $original_text = $text;
+
+               // Since Diaspora is creating a summary for links, this function removes them before posting
+               if ($for_diaspora) {
+                       $text = self::removeShareInformation($text);
+               }
+
+               /**
+                * Transform #tags, strip off the [url] and replace spaces with underscore
+                */
+               $url_search_string = "^\[\]";
+               $text = preg_replace_callback("/#\[url\=([$url_search_string]*)\](.*?)\[\/url\]/i",
+                       function ($matches) {
+                               return '#' . str_replace(' ', '_', $matches[2]);
+                       },
+                       $text
+               );
+
+               // Converting images with size parameters to simple images. Markdown doesn't know it.
+               $text = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $text);
+
+               // Extracting multi-line code blocks before the whitespace processing/code highlighter in self::convert()
+               $codeblocks = [];
+
+               $text = preg_replace_callback("#\[code(?:=([^\]]*))?\](.*?)\[\/code\]#is",
+                       function ($matches) use (&$codeblocks) {
+                               $return = $matches[0];
+                               if (strpos($matches[2], "\n") !== false) {
+                                       $return = '#codeblock-' . count($codeblocks) . '#';
+
+                                       $prefix = '````' . $matches[1] . PHP_EOL;
+                                       $codeblocks[] = $prefix . trim($matches[2]) . PHP_EOL . '````';
+                               }
+                               return $return;
+                       },
+                       $text
+               );
+
+               // Convert it to HTML - don't try oembed
+               if ($for_diaspora) {
+                       $text = self::convert($text, false, 3);
+
+                       // Add all tags that maybe were removed
+                       if (preg_match_all("/#\[url\=([$url_search_string]*)\](.*?)\[\/url\]/ism", $original_text, $tags)) {
+                               $tagline = "";
+                               foreach ($tags[2] as $tag) {
+                                       $tag = html_entity_decode($tag, ENT_QUOTES, 'UTF-8');
+                                       if (!strpos(html_entity_decode($text, ENT_QUOTES, 'UTF-8'), '#' . $tag)) {
+                                               $tagline .= '#' . $tag . ' ';
+                                       }
+                               }
+                               $text = $text . " " . $tagline;
+                       }
+               } else {
+                       $text = self::convert($text, false, 4);
+               }
+
+               // mask some special HTML chars from conversation to markdown
+               $text = str_replace(['&lt;', '&gt;', '&amp;'], ['&_lt_;', '&_gt_;', '&_amp_;'], $text);
+
+               // If a link is followed by a quote then there should be a newline before it
+               // Maybe we should make this newline at every time before a quote.
+               $text = str_replace(["</a><blockquote>"], ["</a><br><blockquote>"], $text);
+
+               $stamp1 = microtime(true);
+
+               // Now convert HTML to Markdown
+               $converter = new HtmlConverter();
+               $text = $converter->convert($text);
+
+               // unmask the special chars back to HTML
+               $text = str_replace(['&\_lt\_;', '&\_gt\_;', '&\_amp\_;'], ['&lt;', '&gt;', '&amp;'], $text);
+
+               $a->save_timestamp($stamp1, "parser");
+
+               // Libertree has a problem with escaped hashtags.
+               $text = str_replace(['\#'], ['#'], $text);
+
+               // Remove any leading or trailing whitespace, as this will mess up
+               // the Diaspora signature verification and cause the item to disappear
+               $text = trim($text);
+
+               if ($for_diaspora) {
+                       $url_search_string = "^\[\]";
+                       $text = preg_replace_callback(
+                               "/([@]\[(.*?)\])\(([$url_search_string]*?)\)/ism",
+                               ['self', 'bbCodeMention2DiasporaCallback'],
+                               $text
+                       );
+               }
+
+               // Restore code blocks
+               $text = preg_replace_callback('/#codeblock-([0-9]+)#/iU',
+                       function ($matches) use ($codeblocks) {
+                               $return = '';
+                               if (isset($codeblocks[intval($matches[1])])) {
+                                       $return = $codeblocks[$matches[1]];
+                               }
+                               return $return;
+                       },
+                       $text
+               );
+
+               Addon::callHooks('bb2diaspora', $text);
+
+               return $text;
+       }
 }