X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=src%2FContent%2FText%2FBBCode.php;h=6c441bac6fb152621a948c8a98da59cee72b4020;hb=a66e9b81bafdb94fcb5405818f721c3a0dc2a418;hp=13d4e1b055f66b71c1e369fa5e6151819aee47b8;hpb=c845415a99ebc348103815a7b2c55b15c75cdd24;p=friendica.git

diff --git a/src/Content/Text/BBCode.php b/src/Content/Text/BBCode.php
index 13d4e1b055..6c441bac6f 100644
--- a/src/Content/Text/BBCode.php
+++ b/src/Content/Text/BBCode.php
@@ -1,34 +1,36 @@
 <?php
+
 /**
  * @file src/Content/Text/BBCode.php
  */
+
 namespace Friendica\Content\Text;
 
 use DOMDocument;
-use DomXPath;
+use DOMXPath;
 use Exception;
+use Friendica\BaseObject;
 use Friendica\Content\OEmbed;
 use Friendica\Content\Smilies;
-use Friendica\Content\Text\Plaintext;
 use Friendica\Core\Addon;
 use Friendica\Core\Cache;
 use Friendica\Core\Config;
 use Friendica\Core\L10n;
-use Friendica\Core\Protocol;
 use Friendica\Core\PConfig;
+use Friendica\Core\Protocol;
 use Friendica\Core\System;
 use Friendica\Model\Contact;
+use Friendica\Model\Event;
+use Friendica\Network\Probe;
 use Friendica\Object\Image;
 use Friendica\Util\Map;
 use Friendica\Util\Network;
 use Friendica\Util\ParseUrl;
+use League\HTMLToMarkdown\HtmlConverter;
 
-require_once "include/event.php";
-require_once "include/html2plain.php";
-require_once "include/html2bbcode.php";
 require_once "mod/proxy.php";
 
-class BBCode
+class BBCode extends BaseObject
 {
 	/**
 	 * @brief Fetches attachment data that were generated the old way
@@ -74,10 +76,12 @@ class BBCode
 
 					$picturedata = Image::getInfoFromURL($matches[1]);
 
-					if (($picturedata[0] >= 500) && ($picturedata[0] >= $picturedata[1])) {
-						$post["image"] = $matches[1];
-					} else {
-						$post["preview"] = $matches[1];
+					if ($picturedata) {
+						if (($picturedata[0] >= 500) && ($picturedata[0] >= $picturedata[1])) {
+							$post["image"] = $matches[1];
+						} else {
+							$post["preview"] = $matches[1];
+						}
 					}
 				}
 
@@ -174,7 +178,7 @@ class BBCode
 		}
 
 		if ($title != "") {
-			$title = BBCode::convert(html_entity_decode($title, ENT_QUOTES, 'UTF-8'), false, true);
+			$title = self::convert(html_entity_decode($title, ENT_QUOTES, 'UTF-8'), false, true);
 			$title = html_entity_decode($title, ENT_QUOTES, 'UTF-8');
 			$title = str_replace(["[", "]"], ["&#91;", "&#93;"], $title);
 			$data["title"] = $title;
@@ -239,6 +243,9 @@ class BBCode
 			$body = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $body);
 
 			$URLSearchString = "^\[\]";
+
+			$body = preg_replace("/\[img\=([$URLSearchString]*)\](.*?)\[\/img\]/ism", '[img]$1[/img]', $body);
+
 			if (preg_match_all("(\[url=([$URLSearchString]*)\]\s*\[img\]([$URLSearchString]*)\[\/img\]\s*\[\/url\])ism", $body, $pictures, PREG_SET_ORDER)) {
 				if ((count($pictures) == 1) && !$has_title) {
 					// Checking, if the link goes to a picture
@@ -264,7 +271,7 @@ class BBCode
 						$post["text"] = str_replace($pictures[0][0], "", $body);
 					} else {
 						$imgdata = Image::getInfoFromURL($pictures[0][1]);
-						if (substr($imgdata["mime"], 0, 6) == "image/") {
+						if ($imgdata && substr($imgdata["mime"], 0, 6) == "image/") {
 							$post["type"] = "photo";
 							$post["image"] = $pictures[0][1];
 							$post["preview"] = $pictures[0][2];
@@ -336,159 +343,20 @@ class BBCode
 	}
 
 	/**
-	 * @brief Convert a message into plaintext for connectors to other networks
+	 * @brief Converts a BBCode text into plaintext
 	 *
-	 * @param array $b The message array that is about to be posted
-	 * @param int $limit The maximum number of characters when posting to that network
-	 * @param bool $includedlinks Has an attached link to be included into the message?
-	 * @param int $htmlmode This triggers the behaviour of the bbcode conversion
-	 * @param string $target_network Name of the network where the post should go to.
+	 * @param bool $keep_urls Whether to keep URLs in the resulting plaintext
 	 *
-	 * @return string The converted message
+	 * @return string
 	 */
-	public static function toPlaintext($b, $limit = 0, $includedlinks = false, $htmlmode = 2, $target_network = "")
+	public static function toPlaintext($text, $keep_urls = true)
 	{
-		// Remove the hash tags
-		$URLSearchString = "^\[\]";
-		$body = preg_replace("/([#@])\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", '$1$3', $b["body"]);
-
-		// Add an URL element if the text contains a raw link
-		$body = preg_replace("/([^\]\='".'"'."]|^)(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,]+)/ism", '$1[url]$2[/url]', $body);
-
-		// Remove the abstract
-		$body = self::stripAbstract($body);
-
-		// At first look at data that is attached via "type-..." stuff
-		// This will hopefully replaced with a dedicated bbcode later
-		//$post = self::getAttachedData($b["body"]);
-		$post = self::getAttachedData($body, $b);
-
-		if (($b["title"] != "") && ($post["text"] != "")) {
-			$post["text"] = trim($b["title"]."\n\n".$post["text"]);
-		} elseif ($b["title"] != "") {
-			$post["text"] = trim($b["title"]);
-		}
-
-		$abstract = "";
-
-		// Fetch the abstract from the given target network
-		if ($target_network != "") {
-			$default_abstract = self::getAbstract($b["body"]);
-			$abstract = self::getAbstract($b["body"], $target_network);
-
-			// If we post to a network with no limit we only fetch
-			// an abstract exactly for this network
-			if (($limit == 0) && ($abstract == $default_abstract)) {
-				$abstract = "";
-			}
-		} else {// Try to guess the correct target network
-			switch ($htmlmode) {
-				case 8:
-					$abstract = self::getAbstract($b["body"], NETWORK_TWITTER);
-					break;
-				case 7:
-					$abstract = self::getAbstract($b["body"], NETWORK_STATUSNET);
-					break;
-				case 6:
-					$abstract = self::getAbstract($b["body"], NETWORK_APPNET);
-					break;
-				default: // We don't know the exact target.
-					// We fetch an abstract since there is a posting limit.
-					if ($limit > 0) {
-						$abstract = self::getAbstract($b["body"]);
-					}
-			}
-		}
-
-		if ($abstract != "") {
-			$post["text"] = $abstract;
-
-			if ($post["type"] == "text") {
-				$post["type"] = "link";
-				$post["url"] = $b["plink"];
-			}
+		$naked_text = preg_replace('/\[(.+?)\]/','', $text);
+		if (!$keep_urls) {
+			$naked_text = preg_replace('#https?\://[^\s<]+[^\s\.\)]#i', '', $naked_text);
 		}
 
-		$html = BBCode::convert($post["text"].$post["after"], false, $htmlmode);
-		$msg = html2plain($html, 0, true);
-		$msg = trim(html_entity_decode($msg, ENT_QUOTES, 'UTF-8'));
-
-		$link = "";
-		if ($includedlinks) {
-			if ($post["type"] == "link") {
-				$link = $post["url"];
-			} elseif ($post["type"] == "text") {
-				$link = $post["url"];
-			} elseif ($post["type"] == "video") {
-				$link = $post["url"];
-			} elseif ($post["type"] == "photo") {
-				$link = $post["image"];
-			}
-
-			if (($msg == "") && isset($post["title"])) {
-				$msg = trim($post["title"]);
-			}
-
-			if (($msg == "") && isset($post["description"])) {
-				$msg = trim($post["description"]);
-			}
-
-			// If the link is already contained in the post, then it neeedn't to be added again
-			// But: if the link is beyond the limit, then it has to be added.
-			if (($link != "") && strstr($msg, $link)) {
-				$pos = strpos($msg, $link);
-
-				// Will the text be shortened in the link?
-				// Or is the link the last item in the post?
-				if (($limit > 0) && ($pos < $limit) && (($pos + 23 > $limit) || ($pos + strlen($link) == strlen($msg)))) {
-					$msg = trim(str_replace($link, "", $msg));
-				} elseif (($limit == 0) || ($pos < $limit)) {
-					// The limit has to be increased since it will be shortened - but not now
-					// Only do it with Twitter (htmlmode = 8)
-					if (($limit > 0) && (strlen($link) > 23) && ($htmlmode == 8)) {
-						$limit = $limit - 23 + strlen($link);
-					}
-
-					$link = "";
-
-					if ($post["type"] == "text") {
-						unset($post["url"]);
-					}
-				}
-			}
-		}
-
-		if ($limit > 0) {
-			// Reduce multiple spaces
-			// When posted to a network with limited space, we try to gain space where possible
-			while (strpos($msg, "  ") !== false) {
-				$msg = str_replace("  ", " ", $msg);
-			}
-
-			// Twitter is using its own limiter, so we always assume that shortened links will have this length
-			if (iconv_strlen($link, "UTF-8") > 0) {
-				$limit = $limit - 23;
-			}
-
-			if (iconv_strlen($msg, "UTF-8") > $limit) {
-				if (($post["type"] == "text") && isset($post["url"])) {
-					$post["url"] = $b["plink"];
-				} elseif (!isset($post["url"])) {
-					$limit = $limit - 23;
-					$post["url"] = $b["plink"];
-				// Which purpose has this line? It is now uncommented, but left as a reminder
-				//} elseif (strpos($b["body"], "[share") !== false) {
-				//	$post["url"] = $b["plink"];
-				} elseif (PConfig::get($b["uid"], "system", "no_intelligent_shortening")) {
-					$post["url"] = $b["plink"];
-				}
-				$msg = Plaintext::shorten($msg, $limit);
-			}
-		}
-
-		$post["text"] = trim($msg);
-
-		return($post);
+		return $naked_text;
 	}
 
 	public static function scaleExternalImages($srctext, $include_link = true, $scale_replace = false)
@@ -707,7 +575,7 @@ class BBCode
 
 				if ($data["description"] != "" && $data["description"] != $data["title"]) {
 					// Sanitize the HTML by converting it to BBCode
-					$bbcode = html2bbcode($data["description"]);
+					$bbcode = HTML::toBBCode($data["description"]);
 					$return .= sprintf('<blockquote>%s</blockquote>', trim(self::convert($bbcode)));
 				}
 				if ($data["type"] == "link") {
@@ -762,27 +630,6 @@ class BBCode
 		return $text . "\n" . $data["after"];
 	}
 
-	private static function cleanCss($input)
-	{
-		$cleaned = "";
-
-		$input = strtolower($input);
-
-		for ($i = 0; $i < strlen($input); $i++) {
-			$char = substr($input, $i, 1);
-
-			if (($char >= "a") && ($char <= "z")) {
-				$cleaned .= $char;
-			}
-
-			if (!(strpos(" #;:0123456789-_.%", $char) === false)) {
-				$cleaned .= $char;
-			}
-		}
-
-		return $cleaned;
-	}
-
 	/**
 	 * Converts [url] BBCodes in a format that looks fine on Mastodon. (callback function)
 	 *
@@ -1197,7 +1044,7 @@ class BBCode
 		$text = Cache::get($match[1]);
 
 		if (is_null($text)) {
-			$a = get_app();
+			$a = self::getApp();
 
 			$stamp1 = microtime(true);
 
@@ -1220,7 +1067,7 @@ class BBCode
 
 				$doc = new DOMDocument();
 				@$doc->loadHTML($body);
-				$xpath = new DomXPath($doc);
+				$xpath = new DOMXPath($doc);
 				$list = $xpath->query("//meta[@name]");
 				foreach ($list as $node) {
 					$attr = [];
@@ -1256,7 +1103,7 @@ class BBCode
 		$text = Cache::get($match[1]);
 
 		if (is_null($text)) {
-			$a = get_app();
+			$a = self::getApp();
 
 			$stamp1 = microtime(true);
 
@@ -1280,7 +1127,7 @@ class BBCode
 
 				$doc = new DOMDocument();
 				@$doc->loadHTML($body);
-				$xpath = new DomXPath($doc);
+				$xpath = new DOMXPath($doc);
 				$list = $xpath->query("//meta[@name]");
 				foreach ($list as $node) {
 					$attr = [];
@@ -1309,13 +1156,17 @@ class BBCode
 
 	private static function textHighlightCallback($match)
 	{
+		// Fallback in case the language doesn't exist
+		$return = '[code]' . $match[2] . '[/code]';
+
 		if (in_array(strtolower($match[1]),
 				['php', 'css', 'mysql', 'sql', 'abap', 'diff', 'html', 'perl', 'ruby',
-				'vbscript', 'avrc', 'dtd', 'java', 'xml', 'cpp', 'python', 'javascript', 'js', 'sh'])
+				'vbscript', 'avrc', 'dtd', 'java', 'xml', 'cpp', 'python', 'javascript', 'js', 'sh', 'bash'])
 		) {
-			return text_highlight($match[2], strtolower($match[1]));
+			$return = text_highlight($match[2], strtolower($match[1]));
 		}
-		return $match[0];
+
+		return $return;
 	}
 
 	/**
@@ -1343,7 +1194,7 @@ class BBCode
 	 */
 	public static function convert($text, $try_oembed = true, $simple_html = false, $for_plaintext = false)
 	{
-		$a = get_app();
+		$a = self::getApp();
 
 		/*
 		 * preg_match_callback function to replace potential Oembed tags with Oembed content
@@ -1392,7 +1243,7 @@ class BBCode
 		// After we're finished processing the bbcode we'll
 		// replace all of the event code with a reformatted version.
 
-		$ev = bbtoevent($text);
+		$ev = Event::fromBBCode($text);
 
 		// Replace any html brackets with HTML Entities to prevent executing HTML or script
 		// Don't use strip_tags here because it breaks [url] search by replacing & with amp
@@ -1593,7 +1444,7 @@ class BBCode
 		$text = preg_replace("(\[u\](.*?)\[\/u\])ism", '<u>$1</u>', $text);
 
 		// Check for strike-through text
-		$text = preg_replace("(\[s\](.*?)\[\/s\])ism", '<strike>$1</strike>', $text);
+		$text = preg_replace("(\[s\](.*?)\[\/s\])ism", '<s>$1</s>', $text);
 
 		// Check for over-line text
 		$text = preg_replace("(\[o\](.*?)\[\/o\])ism", '<span class="overline">$1</span>', $text);
@@ -1616,7 +1467,7 @@ class BBCode
 		$text = preg_replace_callback(
 			"(\[style=(.*?)\](.*?)\[\/style\])ism",
 			function ($match) {
-				return "<span style=\"" . self::cleanCss($match[1]) . ";\">" . $match[2] . "</span>";
+				return "<span style=\"" . HTML::sanitizeCSS($match[1]) . ";\">" . $match[2] . "</span>";
 			},
 			$text
 		);
@@ -1625,7 +1476,7 @@ class BBCode
 		$text = preg_replace_callback(
 			"(\[class=(.*?)\](.*?)\[\/class\])ism",
 			function ($match) {
-				return "<span class=\"" . self::cleanCss($match[1]) . "\">" . $match[2] . "</span>";
+				return "<span class=\"" . HTML::sanitizeCSS($match[1]) . "\">" . $match[2] . "</span>";
 			},
 			$text
 		);
@@ -1734,6 +1585,14 @@ class BBCode
 		$text = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '<img src="$3" style="width: $1px;" >', $text);
 		$text = preg_replace("/\[zmg\=([0-9]*)x([0-9]*)\](.*?)\[\/zmg\]/ism", '<img class="zrl" src="$3" style="width: $1px;" >', $text);
 
+		$text = preg_replace_callback("/\[img\=([$URLSearchString]*)\](.*?)\[\/img\]/ism",
+			function ($matches) {
+				$matches[1] = proxy_url($matches[1]);
+				$matches[2] = htmlspecialchars($matches[2], ENT_COMPAT);
+				return '<img src="' . $matches[1] . '" alt="' . $matches[2] . '">';
+			},
+			$text);
+
 		// Images
 		// [img]pathtoimage[/img]
 		$text = preg_replace_callback(
@@ -1830,7 +1689,7 @@ class BBCode
 		// start which is always required). Allow desc with a missing summary for compatibility.
 
 		if ((x($ev, 'desc') || x($ev, 'summary')) && x($ev, 'start')) {
-			$sub = format_event_html($ev, $simple_html);
+			$sub = Event::getHTML($ev, $simple_html);
 
 			$text = preg_replace("/\[event\-summary\](.*?)\[\/event\-summary\]/ism", '', $text);
 			$text = preg_replace("/\[event\-description\](.*?)\[\/event\-description\]/ism", '', $text);
@@ -1865,10 +1724,12 @@ class BBCode
 		$text = preg_replace_callback("/\[nobb\](.*?)\[\/nobb\]/ism", 'self::unescapeNoparseCallback', $text);
 		$text = preg_replace_callback("/\[pre\](.*?)\[\/pre\]/ism", 'self::unescapeNoparseCallback', $text);
 
-
+		/// @todo What is the meaning of these lines?
 		$text = preg_replace('/\[\&amp\;([#a-z0-9]+)\;\]/', '&$1;', $text);
 		$text = preg_replace('/\&\#039\;/', '\'', $text);
-		$text = preg_replace('/\&quot\;/', '"', $text);
+
+		// Currently deactivated, it made problems with " inside of alt texts.
+		//$text = preg_replace('/\&quot\;/', '"', $text);
 
 		// fix any escaped ampersands that may have been converted into links
 		$text = preg_replace('/\<([^>]*?)(src|href)=(.*?)\&amp\;(.*?)\>/ism', '<$1$2=$3&$4>', $text);
@@ -1947,7 +1808,7 @@ class BBCode
 	 * @param string $addon The addon for which the abstract is meant for
 	 * @return string The abstract
 	 */
-	private static function getAbstract($text, $addon = "")
+	public static function getAbstract($text, $addon = "")
 	{
 		$abstract = "";
 		$abstracts = [];
@@ -1969,4 +1830,148 @@ class BBCode
 
 		return $abstract;
 	}
+
+	/**
+	 * @brief Callback function to replace a Friendica style mention in a mention for Diaspora
+	 *
+	 * @param array $match Matching values for the callback
+	 * @return string Replaced mention
+	 */
+	private static function bbCodeMention2DiasporaCallback($match)
+	{
+		$contact = Contact::getDetailsByURL($match[3]);
+
+		if (empty($contact['addr'])) {
+			$contact = Probe::uri($match[3]);
+		}
+
+		if (empty($contact['addr'])) {
+			return $match[0];
+		}
+
+		$mention = '@{' . $match[2] . '; ' . $contact['addr'] . '}';
+		return $mention;
+	}
+
+	/**
+	 * @brief Converts a BBCode text into Markdown
+	 *
+	 * This function converts a BBCode item body to be sent to Markdown-enabled
+	 * systems like Diaspora and Libertree
+	 *
+	 * @param string $text
+	 * @param bool   $for_diaspora Diaspora requires more changes than Libertree
+	 * @return string
+	 */
+	public static function toMarkdown($text, $for_diaspora = true)
+	{
+		$a = self::getApp();
+
+		$original_text = $text;
+
+		// Since Diaspora is creating a summary for links, this function removes them before posting
+		if ($for_diaspora) {
+			$text = self::removeShareInformation($text);
+		}
+
+		/**
+		 * Transform #tags, strip off the [url] and replace spaces with underscore
+		 */
+		$url_search_string = "^\[\]";
+		$text = preg_replace_callback("/#\[url\=([$url_search_string]*)\](.*?)\[\/url\]/i",
+			function ($matches) {
+				return '#' . str_replace(' ', '_', $matches[2]);
+			},
+			$text
+		);
+
+		// Converting images with size parameters to simple images. Markdown doesn't know it.
+		$text = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $text);
+
+		// Extracting multi-line code blocks before the whitespace processing/code highlighter in self::convert()
+		$codeblocks = [];
+
+		$text = preg_replace_callback("#\[code(?:=([^\]]*))?\](.*?)\[\/code\]#is",
+			function ($matches) use (&$codeblocks) {
+				$return = $matches[0];
+				if (strpos($matches[2], "\n") !== false) {
+					$return = '#codeblock-' . count($codeblocks) . '#';
+
+					$prefix = '````' . $matches[1] . PHP_EOL;
+					$codeblocks[] = $prefix . trim($matches[2]) . PHP_EOL . '````';
+				}
+				return $return;
+			},
+			$text
+		);
+
+		// Convert it to HTML - don't try oembed
+		if ($for_diaspora) {
+			$text = self::convert($text, false, 3);
+
+			// Add all tags that maybe were removed
+			if (preg_match_all("/#\[url\=([$url_search_string]*)\](.*?)\[\/url\]/ism", $original_text, $tags)) {
+				$tagline = "";
+				foreach ($tags[2] as $tag) {
+					$tag = html_entity_decode($tag, ENT_QUOTES, 'UTF-8');
+					if (!strpos(html_entity_decode($text, ENT_QUOTES, 'UTF-8'), '#' . $tag)) {
+						$tagline .= '#' . $tag . ' ';
+					}
+				}
+				$text = $text . " " . $tagline;
+			}
+		} else {
+			$text = self::convert($text, false, 4);
+		}
+
+		// mask some special HTML chars from conversation to markdown
+		$text = str_replace(['&lt;', '&gt;', '&amp;'], ['&_lt_;', '&_gt_;', '&_amp_;'], $text);
+
+		// If a link is followed by a quote then there should be a newline before it
+		// Maybe we should make this newline at every time before a quote.
+		$text = str_replace(["</a><blockquote>"], ["</a><br><blockquote>"], $text);
+
+		$stamp1 = microtime(true);
+
+		// Now convert HTML to Markdown
+		$converter = new HtmlConverter();
+		$text = $converter->convert($text);
+
+		// unmask the special chars back to HTML
+		$text = str_replace(['&\_lt\_;', '&\_gt\_;', '&\_amp\_;'], ['&lt;', '&gt;', '&amp;'], $text);
+
+		$a->save_timestamp($stamp1, "parser");
+
+		// Libertree has a problem with escaped hashtags.
+		$text = str_replace(['\#'], ['#'], $text);
+
+		// Remove any leading or trailing whitespace, as this will mess up
+		// the Diaspora signature verification and cause the item to disappear
+		$text = trim($text);
+
+		if ($for_diaspora) {
+			$url_search_string = "^\[\]";
+			$text = preg_replace_callback(
+				"/([@]\[(.*?)\])\(([$url_search_string]*?)\)/ism",
+				['self', 'bbCodeMention2DiasporaCallback'],
+				$text
+			);
+		}
+
+		// Restore code blocks
+		$text = preg_replace_callback('/#codeblock-([0-9]+)#/iU',
+			function ($matches) use ($codeblocks) {
+				$return = '';
+				if (isset($codeblocks[intval($matches[1])])) {
+					$return = $codeblocks[$matches[1]];
+				}
+				return $return;
+			},
+			$text
+		);
+
+		Addon::callHooks('bb2diaspora', $text);
+
+		return $text;
+	}
 }