]> git.mxchange.org Git - friendica.git/blobdiff - src/Content/Text/BBCode.php
Merge pull request #4548 from MrPetovan/task/3878-move-bb2diaspora-to-src
[friendica.git] / src / Content / Text / BBCode.php
index d2df3cee2f7714b9797ebdc38e4e05feb4028f46..9c7dac26052fa7c302da98e89fd6e73d2684d88d 100644 (file)
@@ -1,33 +1,37 @@
 <?php
+
 /**
  * @file src/Content/Text/BBCode.php
  */
+
 namespace Friendica\Content\Text;
 
 use DOMDocument;
+use DomXPath;
 use Exception;
+use Friendica\BaseObject;
 use Friendica\Content\OEmbed;
 use Friendica\Content\Smilies;
-use Friendica\Content\Text\Plaintext;
 use Friendica\Core\Addon;
 use Friendica\Core\Cache;
 use Friendica\Core\Config;
 use Friendica\Core\L10n;
-use Friendica\Core\Protocol;
 use Friendica\Core\PConfig;
+use Friendica\Core\Protocol;
 use Friendica\Core\System;
 use Friendica\Model\Contact;
+use Friendica\Network\Probe;
 use Friendica\Object\Image;
 use Friendica\Util\Map;
 use Friendica\Util\Network;
 use Friendica\Util\ParseUrl;
+use League\HTMLToMarkdown\HtmlConverter;
 
-require_once "include/bbcode.php";
 require_once "include/event.php";
 require_once "include/html2plain.php";
 require_once "mod/proxy.php";
 
-class BBCode
+class BBCode extends BaseObject
 {
        /**
         * @brief Fetches attachment data that were generated the old way
@@ -173,7 +177,7 @@ class BBCode
                }
 
                if ($title != "") {
-                       $title = bbcode(html_entity_decode($title, ENT_QUOTES, 'UTF-8'), false, false, true);
+                       $title = self::convert(html_entity_decode($title, ENT_QUOTES, 'UTF-8'), false, true);
                        $title = html_entity_decode($title, ENT_QUOTES, 'UTF-8');
                        $title = str_replace(["[", "]"], ["&#91;", "&#93;"], $title);
                        $data["title"] = $title;
@@ -408,7 +412,7 @@ class BBCode
                        }
                }
 
-               $html = bbcode($post["text"].$post["after"], false, false, $htmlmode);
+               $html = self::convert($post["text"].$post["after"], false, $htmlmode);
                $msg = html2plain($html, 0, true);
                $msg = trim(html_entity_decode($msg, ENT_QUOTES, 'UTF-8'));
 
@@ -705,7 +709,7 @@ class BBCode
                                }
 
                                if ($data["description"] != "" && $data["description"] != $data["title"]) {
-                                       $return .= sprintf('<blockquote>%s</blockquote>', trim(bbcode($data["description"])));
+                                       $return .= sprintf('<blockquote>%s</blockquote>', trim(self::convert($data["description"])));
                                }
 
                                if ($data["type"] == "link") {
@@ -1077,7 +1081,7 @@ class BBCode
                // We only call this so that a previously unknown contact can be added.
                // This is important for the function "Model\Contact::getDetailsByURL()".
                // This function then can fetch an entry from the contact table.
-               Contact::getIdForURL($profile, 0);
+               Contact::getIdForURL($profile, 0, true);
 
                $data = Contact::getDetailsByURL($profile);
 
@@ -1114,7 +1118,7 @@ class BBCode
                                $text = $preshare . html_entity_decode("&#x2672; ", ENT_QUOTES, 'UTF-8') . ' ' . $userid_compact . ": <br />" . $share[3];
                                break;
                        case 3: // Diaspora
-                               $headline .= '<b>' . html_entity_decode("&#x2672; ", ENT_QUOTES, 'UTF-8') . $userid . ':</b><br />';
+                               $headline = '<b>' . html_entity_decode("&#x2672; ", ENT_QUOTES, 'UTF-8') . $userid . ':</b><br />';
 
                                $text = trim($share[1]);
 
@@ -1134,7 +1138,7 @@ class BBCode
 
                                break;
                        case 4:
-                               $headline .= '<br /><b>' . html_entity_decode("&#x2672; ", ENT_QUOTES, 'UTF-8');
+                               $headline = '<br /><b>' . html_entity_decode("&#x2672; ", ENT_QUOTES, 'UTF-8');
                                $headline .= L10n::t('<a href="%1$s" target="_blank">%2$s</a> %3$s', $link, $userid, $posted);
                                $headline .= ":</b><br />";
 
@@ -1202,7 +1206,7 @@ class BBCode
                $text = Cache::get($match[1]);
 
                if (is_null($text)) {
-                       $a = get_app();
+                       $a = self::getApp();
 
                        $stamp1 = microtime(true);
 
@@ -1261,7 +1265,7 @@ class BBCode
                $text = Cache::get($match[1]);
 
                if (is_null($text)) {
-                       $a = get_app();
+                       $a = self::getApp();
 
                        $stamp1 = microtime(true);
 
@@ -1285,7 +1289,7 @@ class BBCode
 
                                $doc = new DOMDocument();
                                @$doc->loadHTML($body);
-                               $xpath = new DomXPath($doc);
+                               $xpath = new DOMXPath($doc);
                                $list = $xpath->query("//meta[@name]");
                                foreach ($list as $node) {
                                        $attr = [];
@@ -1341,15 +1345,14 @@ class BBCode
         * - 8: Used for WP backlink text setting
         *
         * @param string $text
-        * @param bool   $preserve_nl
         * @param bool   $try_oembed
         * @param int    $simple_html
         * @param bool   $for_plaintext
         * @return string
         */
-       public static function convert($text, $preserve_nl = false, $try_oembed = true, $simple_html = false, $for_plaintext = false)
+       public static function convert($text, $try_oembed = true, $simple_html = false, $for_plaintext = false)
        {
-               $a = get_app();
+               $a = self::getApp();
 
                /*
                 * preg_match_callback function to replace potential Oembed tags with Oembed content
@@ -1472,10 +1475,6 @@ class BBCode
 
                $text = str_replace(["\r","\n"], ['<br />', '<br />'], $text);
 
-               if ($preserve_nl) {
-                       $text = str_replace(["\n", "\r"], ['', ''], $text);
-               }
-
                // Remove all hashtag addresses
                if ((!$try_oembed || $simple_html) && !in_array($simple_html, [3, 7])) {
                        $text = preg_replace("/([#@!])\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", '$1$3', $text);
@@ -1726,7 +1725,7 @@ class BBCode
                $endlessloop = 0;
                while ((strpos($text, "[/quote]")!== false)  && (strpos($text, "[quote=") !== false) && (++$endlessloop < 20)) {
                        $text = preg_replace("/\[quote=[\"\']*(.*?)[\"\']*\](.*?)\[\/quote\]/ism",
-                                                "<br /><strong class=".'"author"'.">" . $t_wrote . "</strong><blockquote>$2</blockquote>",
+                                                "<p><strong class=".'"author"'.">" . $t_wrote . "</strong></p><blockquote>$2</blockquote>",
                                                 $text);
                }
 
@@ -1983,4 +1982,148 @@ class BBCode
 
                return $abstract;
        }
+
+       /**
+        * @brief Callback function to replace a Friendica style mention in a mention for Diaspora
+        *
+        * @param array $match Matching values for the callback
+        * @return string Replaced mention
+        */
+       private static function bbCodeMention2DiasporaCallback($match)
+       {
+               $contact = Contact::getDetailsByURL($match[3]);
+
+               if (empty($contact['addr'])) {
+                       $contact = Probe::uri($match[3]);
+               }
+
+               if (empty($contact['addr'])) {
+                       return $match[0];
+               }
+
+               $mention = '@{' . $match[2] . '; ' . $contact['addr'] . '}';
+               return $mention;
+       }
+
+       /**
+        * @brief Converts a BBCode text into Markdown
+        *
+        * This function converts a BBCode item body to be sent to Markdown-enabled
+        * systems like Diaspora and Libertree
+        *
+        * @param string $text
+        * @param bool   $for_diaspora Diaspora requires more changes than Libertree
+        * @return string
+        */
+       public static function toMarkdown($text, $for_diaspora = true)
+       {
+               $a = self::getApp();
+
+               $original_text = $text;
+
+               // Since Diaspora is creating a summary for links, this function removes them before posting
+               if ($for_diaspora) {
+                       $text = self::removeShareInformation($text);
+               }
+
+               /**
+                * Transform #tags, strip off the [url] and replace spaces with underscore
+                */
+               $url_search_string = "^\[\]";
+               $text = preg_replace_callback("/#\[url\=([$url_search_string]*)\](.*?)\[\/url\]/i",
+                       function ($matches) {
+                               return '#' . str_replace(' ', '_', $matches[2]);
+                       },
+                       $text
+               );
+
+               // Converting images with size parameters to simple images. Markdown doesn't know it.
+               $text = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $text);
+
+               // Extracting multi-line code blocks before the whitespace processing/code highlighter in self::convert()
+               $codeblocks = [];
+
+               $text = preg_replace_callback("#\[code(?:=([^\]]*))?\](.*?)\[\/code\]#is",
+                       function ($matches) use (&$codeblocks) {
+                               $return = $matches[0];
+                               if (strpos($matches[2], "\n") !== false) {
+                                       $return = '#codeblock-' . count($codeblocks) . '#';
+
+                                       $prefix = '````' . $matches[1] . PHP_EOL;
+                                       $codeblocks[] = $prefix . trim($matches[2]) . PHP_EOL . '````';
+                               }
+                               return $return;
+                       },
+                       $text
+               );
+
+               // Convert it to HTML - don't try oembed
+               if ($for_diaspora) {
+                       $text = self::convert($text, false, 3);
+
+                       // Add all tags that maybe were removed
+                       if (preg_match_all("/#\[url\=([$url_search_string]*)\](.*?)\[\/url\]/ism", $original_text, $tags)) {
+                               $tagline = "";
+                               foreach ($tags[2] as $tag) {
+                                       $tag = html_entity_decode($tag, ENT_QUOTES, 'UTF-8');
+                                       if (!strpos(html_entity_decode($text, ENT_QUOTES, 'UTF-8'), '#' . $tag)) {
+                                               $tagline .= '#' . $tag . ' ';
+                                       }
+                               }
+                               $text = $text . " " . $tagline;
+                       }
+               } else {
+                       $text = self::convert($text, false, 4);
+               }
+
+               // mask some special HTML chars from conversation to markdown
+               $text = str_replace(['&lt;', '&gt;', '&amp;'], ['&_lt_;', '&_gt_;', '&_amp_;'], $text);
+
+               // If a link is followed by a quote then there should be a newline before it
+               // Maybe we should make this newline at every time before a quote.
+               $text = str_replace(["</a><blockquote>"], ["</a><br><blockquote>"], $text);
+
+               $stamp1 = microtime(true);
+
+               // Now convert HTML to Markdown
+               $converter = new HtmlConverter();
+               $text = $converter->convert($text);
+
+               // unmask the special chars back to HTML
+               $text = str_replace(['&\_lt\_;', '&\_gt\_;', '&\_amp\_;'], ['&lt;', '&gt;', '&amp;'], $text);
+
+               $a->save_timestamp($stamp1, "parser");
+
+               // Libertree has a problem with escaped hashtags.
+               $text = str_replace(['\#'], ['#'], $text);
+
+               // Remove any leading or trailing whitespace, as this will mess up
+               // the Diaspora signature verification and cause the item to disappear
+               $text = trim($text);
+
+               if ($for_diaspora) {
+                       $url_search_string = "^\[\]";
+                       $text = preg_replace_callback(
+                               "/([@]\[(.*?)\])\(([$url_search_string]*?)\)/ism",
+                               ['self', 'bbCodeMention2DiasporaCallback'],
+                               $text
+                       );
+               }
+
+               // Restore code blocks
+               $text = preg_replace_callback('/#codeblock-([0-9]+)#/iU',
+                       function ($matches) use ($codeblocks) {
+                               $return = '';
+                               if (isset($codeblocks[intval($matches[1])])) {
+                                       $return = $codeblocks[$matches[1]];
+                               }
+                               return $return;
+                       },
+                       $text
+               );
+
+               Addon::callHooks('bb2diaspora', $text);
+
+               return $text;
+       }
 }