Add new Content\BBCode::toPlaintext()

[friendica.git] / src / Content / Text / BBCode.php
diff --git a/src/Content/Text/BBCode.php b/src/Content/Text/BBCode.php

index ebaef2b29d1accf43625b1e56c9cdcafc7fb61e1..96c8eceb70f97b0c7d07b6dd68296be31caf6b1b 100644 (file)
--- a/src/Content/Text/BBCode.php
+++ b/src/Content/Text/BBCode.php
@@ -76,10 +76,12 @@ class BBCode extends BaseObject
  
                                         $picturedata = Image::getInfoFromURL($matches[1]);
  
-                                       if (($picturedata[0] >= 500) && ($picturedata[0] >= $picturedata[1])) {
-                                               $post["image"] = $matches[1];
-                                       } else {
-                                               $post["preview"] = $matches[1];
+                                       if ($picturedata) {
+                                               if (($picturedata[0] >= 500) && ($picturedata[0] >= $picturedata[1])) {
+                                                       $post["image"] = $matches[1];
+                                               } else {
+                                                       $post["preview"] = $matches[1];
+                                               }
                                         }
                                 }
  
@@ -241,6 +243,9 @@ class BBCode extends BaseObject
                         $body = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $body);
  
                         $URLSearchString = "^\[\]";
+
+                       $body = preg_replace("/\[img\=([$URLSearchString]*)\](.*?)\[\/img\]/ism", '[img]$1[/img]', $body);
+
                         if (preg_match_all("(\[url=([$URLSearchString]*)\]\s*\[img\]([$URLSearchString]*)\[\/img\]\s*\[\/url\])ism", $body, $pictures, PREG_SET_ORDER)) {
                                 if ((count($pictures) == 1) && !$has_title) {
                                         // Checking, if the link goes to a picture
@@ -266,7 +271,7 @@ class BBCode extends BaseObject
                                                 $post["text"] = str_replace($pictures[0][0], "", $body);
                                         } else {
                                                 $imgdata = Image::getInfoFromURL($pictures[0][1]);
-                                               if (substr($imgdata["mime"], 0, 6) == "image/") {
+                                               if ($imgdata && substr($imgdata["mime"], 0, 6) == "image/") {
                                                         $post["type"] = "photo";
                                                         $post["image"] = $pictures[0][1];
                                                         $post["preview"] = $pictures[0][2];
@@ -338,159 +343,20 @@ class BBCode extends BaseObject
         }
  
         /**
-        * @brief Convert a message into plaintext for connectors to other networks
+        * @brief Converts a BBCode text into plaintext
          *
-        * @param array $b The message array that is about to be posted
-        * @param int $limit The maximum number of characters when posting to that network
-        * @param bool $includedlinks Has an attached link to be included into the message?
-        * @param int $htmlmode This triggers the behaviour of the bbcode conversion
-        * @param string $target_network Name of the network where the post should go to.
+        * @param bool $keep_urls Whether to keep URLs in the resulting plaintext
          *
-        * @return string The converted message
+        * @return string
          */
-       public static function toPlaintext($b, $limit = 0, $includedlinks = false, $htmlmode = 2, $target_network = "")
+       public static function toPlaintext($text, $keep_urls = true)
         {
-               // Remove the hash tags
-               $URLSearchString = "^\[\]";
-               $body = preg_replace("/([#@])\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", '$1$3', $b["body"]);
-
-               // Add an URL element if the text contains a raw link
-               $body = preg_replace("/([^\]\='".'"'."]|^)(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,]+)/ism", '$1[url]$2[/url]', $body);
-
-               // Remove the abstract
-               $body = self::stripAbstract($body);
-
-               // At first look at data that is attached via "type-..." stuff
-               // This will hopefully replaced with a dedicated bbcode later
-               //$post = self::getAttachedData($b["body"]);
-               $post = self::getAttachedData($body, $b);
-
-               if (($b["title"] != "") && ($post["text"] != "")) {
-                       $post["text"] = trim($b["title"]."\n\n".$post["text"]);
-               } elseif ($b["title"] != "") {
-                       $post["text"] = trim($b["title"]);
-               }
-
-               $abstract = "";
-
-               // Fetch the abstract from the given target network
-               if ($target_network != "") {
-                       $default_abstract = self::getAbstract($b["body"]);
-                       $abstract = self::getAbstract($b["body"], $target_network);
-
-                       // If we post to a network with no limit we only fetch
-                       // an abstract exactly for this network
-                       if (($limit == 0) && ($abstract == $default_abstract)) {
-                               $abstract = "";
-                       }
-               } else {// Try to guess the correct target network
-                       switch ($htmlmode) {
-                               case 8:
-                                       $abstract = self::getAbstract($b["body"], NETWORK_TWITTER);
-                                       break;
-                               case 7:
-                                       $abstract = self::getAbstract($b["body"], NETWORK_STATUSNET);
-                                       break;
-                               case 6:
-                                       $abstract = self::getAbstract($b["body"], NETWORK_APPNET);
-                                       break;
-                               default: // We don't know the exact target.
-                                       // We fetch an abstract since there is a posting limit.
-                                       if ($limit > 0) {
-                                               $abstract = self::getAbstract($b["body"]);
-                                       }
-                       }
-               }
-
-               if ($abstract != "") {
-                       $post["text"] = $abstract;
-
-                       if ($post["type"] == "text") {
-                               $post["type"] = "link";
-                               $post["url"] = $b["plink"];
-                       }
-               }
-
-               $html = self::convert($post["text"].$post["after"], false, $htmlmode);
-               $msg = HTML::toPlaintext($html, 0, true);
-               $msg = trim(html_entity_decode($msg, ENT_QUOTES, 'UTF-8'));
-
-               $link = "";
-               if ($includedlinks) {
-                       if ($post["type"] == "link") {
-                               $link = $post["url"];
-                       } elseif ($post["type"] == "text") {
-                               $link = $post["url"];
-                       } elseif ($post["type"] == "video") {
-                               $link = $post["url"];
-                       } elseif ($post["type"] == "photo") {
-                               $link = $post["image"];
-                       }
-
-                       if (($msg == "") && isset($post["title"])) {
-                               $msg = trim($post["title"]);
-                       }
-
-                       if (($msg == "") && isset($post["description"])) {
-                               $msg = trim($post["description"]);
-                       }
-
-                       // If the link is already contained in the post, then it neeedn't to be added again
-                       // But: if the link is beyond the limit, then it has to be added.
-                       if (($link != "") && strstr($msg, $link)) {
-                               $pos = strpos($msg, $link);
-
-                               // Will the text be shortened in the link?
-                               // Or is the link the last item in the post?
-                               if (($limit > 0) && ($pos < $limit) && (($pos + 23 > $limit) || ($pos + strlen($link) == strlen($msg)))) {
-                                       $msg = trim(str_replace($link, "", $msg));
-                               } elseif (($limit == 0) || ($pos < $limit)) {
-                                       // The limit has to be increased since it will be shortened - but not now
-                                       // Only do it with Twitter (htmlmode = 8)
-                                       if (($limit > 0) && (strlen($link) > 23) && ($htmlmode == 8)) {
-                                               $limit = $limit - 23 + strlen($link);
-                                       }
-
-                                       $link = "";
-
-                                       if ($post["type"] == "text") {
-                                               unset($post["url"]);
-                                       }
-                               }
-                       }
+               $naked_text = preg_replace('/\[(.+?)\]/','', $text);
+               if (!$keep_urls) {
+                       $naked_text = preg_replace('#https?\://[^\s<]+[^\s\.\)]#i', '', $naked_text);
                 }
  
-               if ($limit > 0) {
-                       // Reduce multiple spaces
-                       // When posted to a network with limited space, we try to gain space where possible
-                       while (strpos($msg, "  ") !== false) {
-                               $msg = str_replace("  ", " ", $msg);
-                       }
-
-                       // Twitter is using its own limiter, so we always assume that shortened links will have this length
-                       if (iconv_strlen($link, "UTF-8") > 0) {
-                               $limit = $limit - 23;
-                       }
-
-                       if (iconv_strlen($msg, "UTF-8") > $limit) {
-                               if (($post["type"] == "text") && isset($post["url"])) {
-                                       $post["url"] = $b["plink"];
-                               } elseif (!isset($post["url"])) {
-                                       $limit = $limit - 23;
-                                       $post["url"] = $b["plink"];
-                               // Which purpose has this line? It is now uncommented, but left as a reminder
-                               //} elseif (strpos($b["body"], "[share") !== false) {
-                               //      $post["url"] = $b["plink"];
-                               } elseif (PConfig::get($b["uid"], "system", "no_intelligent_shortening")) {
-                                       $post["url"] = $b["plink"];
-                               }
-                               $msg = Plaintext::shorten($msg, $limit);
-                       }
-               }
-
-               $post["text"] = trim($msg);
-
-               return($post);
+               return $naked_text;
         }
  
         public static function scaleExternalImages($srctext, $include_link = true, $scale_replace = false)
@@ -1290,13 +1156,17 @@ class BBCode extends BaseObject
  
         private static function textHighlightCallback($match)
         {
+               // Fallback in case the language doesn't exist
+               $return = '[code]' . $match[2] . '[/code]';
+
                 if (in_array(strtolower($match[1]),
                                 ['php', 'css', 'mysql', 'sql', 'abap', 'diff', 'html', 'perl', 'ruby',
-                               'vbscript', 'avrc', 'dtd', 'java', 'xml', 'cpp', 'python', 'javascript', 'js', 'sh'])
+                               'vbscript', 'avrc', 'dtd', 'java', 'xml', 'cpp', 'python', 'javascript', 'js', 'sh', 'bash'])
                 ) {
-                       return text_highlight($match[2], strtolower($match[1]));
+                       $return = text_highlight($match[2], strtolower($match[1]));
                 }
-               return $match[0];
+
+               return $return;
         }
  
         /**
@@ -1574,7 +1444,7 @@ class BBCode extends BaseObject
                 $text = preg_replace("(\[u\](.*?)\[\/u\])ism", '<u>$1</u>', $text);
  
                 // Check for strike-through text
-               $text = preg_replace("(\[s\](.*?)\[\/s\])ism", '<strike>$1</strike>', $text);
+               $text = preg_replace("(\[s\](.*?)\[\/s\])ism", '<s>$1</s>', $text);
  
                 // Check for over-line text
                 $text = preg_replace("(\[o\](.*?)\[\/o\])ism", '<span class="overline">$1</span>', $text);
@@ -1715,6 +1585,14 @@ class BBCode extends BaseObject
                 $text = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '<img src="$3" style="width: $1px;" >', $text);
                 $text = preg_replace("/\[zmg\=([0-9]*)x([0-9]*)\](.*?)\[\/zmg\]/ism", '<img class="zrl" src="$3" style="width: $1px;" >', $text);
  
+               $text = preg_replace_callback("/\[img\=([$URLSearchString]*)\](.*?)\[\/img\]/ism",
+                       function ($matches) {
+                               $matches[1] = proxy_url($matches[1]);
+                               $matches[2] = htmlspecialchars($matches[2], ENT_COMPAT);
+                               return '<img src="' . $matches[1] . '" alt="' . $matches[2] . '">';
+                       },
+                       $text);
+
                 // Images
                 // [img]pathtoimage[/img]
                 $text = preg_replace_callback(
@@ -1846,10 +1724,12 @@ class BBCode extends BaseObject
                 $text = preg_replace_callback("/\[nobb\](.*?)\[\/nobb\]/ism", 'self::unescapeNoparseCallback', $text);
                 $text = preg_replace_callback("/\[pre\](.*?)\[\/pre\]/ism", 'self::unescapeNoparseCallback', $text);
  
-
+               /// @todo What is the meaning of these lines?
                 $text = preg_replace('/\[\&amp\;([#a-z0-9]+)\;\]/', '&$1;', $text);
                 $text = preg_replace('/\&\#039\;/', '\'', $text);
-               $text = preg_replace('/\&quot\;/', '"', $text);
+
+               // Currently deactivated, it made problems with " inside of alt texts.
+               //$text = preg_replace('/\&quot\;/', '"', $text);
  
                 // fix any escaped ampersands that may have been converted into links
                 $text = preg_replace('/\<([^>]*?)(src|href)=(.*?)\&amp\;(.*?)\>/ism', '<$1$2=$3&$4>', $text);