<?php
+
/**
* @file src/Content/Text/BBCode.php
*/
+
namespace Friendica\Content\Text;
use DOMDocument;
+use DOMXPath;
use Exception;
+use Friendica\BaseObject;
use Friendica\Content\OEmbed;
use Friendica\Content\Smilies;
-use Friendica\Content\Text\Plaintext;
use Friendica\Core\Addon;
use Friendica\Core\Cache;
use Friendica\Core\Config;
use Friendica\Core\L10n;
-use Friendica\Core\Network;
use Friendica\Core\PConfig;
+use Friendica\Core\Protocol;
use Friendica\Core\System;
use Friendica\Model\Contact;
+use Friendica\Network\Probe;
use Friendica\Object\Image;
use Friendica\Util\Map;
-use Friendica\Util\Network as NetworkUtil;
+use Friendica\Util\Network;
use Friendica\Util\ParseUrl;
+use League\HTMLToMarkdown\HtmlConverter;
-require_once "include/bbcode.php";
-require_once "include/html2plain.php";
+require_once "include/event.php";
+require_once "mod/proxy.php";
-class BBCode
+class BBCode extends BaseObject
{
/**
* @brief Fetches attachment data that were generated the old way
}
if ($title != "") {
- $title = bbcode(html_entity_decode($title, ENT_QUOTES, 'UTF-8'), false, false, true);
+ $title = self::convert(html_entity_decode($title, ENT_QUOTES, 'UTF-8'), false, true);
$title = html_entity_decode($title, ENT_QUOTES, 'UTF-8');
$title = str_replace(["[", "]"], ["[", "]"], $title);
$data["title"] = $title;
$body = preg_replace("/([^\]\='".'"'."]|^)(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,]+)/ism", '$1[url]$2[/url]', $body);
// Remove the abstract
- $body = self::removeAbstract($body);
+ $body = self::stripAbstract($body);
// At first look at data that is attached via "type-..." stuff
// This will hopefully replaced with a dedicated bbcode later
}
}
- $html = bbcode($post["text"].$post["after"], false, false, $htmlmode);
- $msg = html2plain($html, 0, true);
+ $html = self::convert($post["text"].$post["after"], false, $htmlmode);
+ $msg = HTML::toPlaintext($html, 0, true);
$msg = trim(html_entity_decode($msg, ENT_QUOTES, 'UTF-8'));
$link = "";
} else {
$scaled = $mtch[1];
}
- $i = NetworkUtil::fetchUrl($scaled);
+ $i = Network::fetchUrl($scaled);
if (!$i) {
return $srctext;
}
$return = '';
if ($simplehtml == 7) {
- $return = self::convertUrlForMastodon($data["url"]);
+ $return = self::convertUrlForOStatus($data["url"]);
} elseif (($simplehtml != 4) && ($simplehtml != 0)) {
$return = sprintf('<a href="%s" target="_blank">%s</a><br>', $data["url"], $data["title"]);
} else {
}
if ($data["description"] != "" && $data["description"] != $data["title"]) {
- $return .= sprintf('<blockquote>%s</blockquote>', trim(bbcode($data["description"])));
+ // Sanitize the HTML by converting it to BBCode
+ $bbcode = HTML::toBBCode($data["description"]);
+ $return .= sprintf('<blockquote>%s</blockquote>', trim(self::convert($bbcode)));
}
-
if ($data["type"] == "link") {
$return .= sprintf('<sup><a href="%s">%s</a></sup>', $data['url'], parse_url($data['url'], PHP_URL_HOST));
}
if (($data["url"] != "") && ($data["title"] != "")) {
$text .= "\n[url=" . $data["url"] . "]" . $data["title"] . "[/url]";
} elseif (($data["url"] != "")) {
- $text .= "\n" . $data["url"];
+ $text .= "\n[url]" . $data["url"] . "[/url]";
}
return $text . "\n" . $data["after"];
}
- private static function cleanCss($input)
- {
- $cleaned = "";
-
- $input = strtolower($input);
-
- for ($i = 0; $i < strlen($input); $i++) {
- $char = substr($input, $i, 1);
-
- if (($char >= "a") && ($char <= "z")) {
- $cleaned .= $char;
- }
-
- if (!(strpos(" #;:0123456789-_.%", $char) === false)) {
- $cleaned .= $char;
- }
- }
-
- return $cleaned;
- }
-
/**
* Converts [url] BBCodes in a format that looks fine on Mastodon. (callback function)
*
* @param array $match Array with the matching values
* @return string reformatted link including HTML codes
*/
- private static function convertUrlForMastodonCallback($match)
+ private static function convertUrlForOStatusCallback($match)
{
$url = $match[1];
return $match[0];
}
- return self::convertUrlForMastodon($url);
+ return self::convertUrlForOStatus($url);
}
/**
- * @brief Converts [url] BBCodes in a format that looks fine on Mastodon and GNU Social.
+ * @brief Converts [url] BBCodes in a format that looks fine on OStatus systems.
* @param string $url URL that is about to be reformatted
* @return string reformatted link including HTML codes
*/
- private static function convertUrlForMastodon($url)
+ private static function convertUrlForOStatus($url)
{
$parts = parse_url($url);
$scheme = $parts['scheme'] . '://';
$styled_url = str_replace($scheme, '', $url);
- $html = '<a href="%s" class="attachment" rel="nofollow noopener" target="_blank">' .
- '<span class="invisible">%s</span>';
-
if (strlen($styled_url) > 30) {
- $html .= '<span class="ellipsis">%s</span>' .
- '<span class="invisible">%s</span></a>';
-
- $ellipsis = substr($styled_url, 0, 30);
- $rest = substr($styled_url, 30);
- return sprintf($html, $url, $scheme, $ellipsis, $rest);
- } else {
- $html .= '%s</a>';
- return sprintf($html, $url, $scheme, $styled_url);
+ $styled_url = substr($styled_url, 0, 30) . "…";
}
+
+ $html = '<a href="%s" target="_blank">%s</a>';
+
+ return sprintf($html, $url, $styled_url);
}
/*
}
// We only call this so that a previously unknown contact can be added.
- // This is important for the function "get_contact_details_by_url".
+ // This is important for the function "Model\Contact::getDetailsByURL()".
// This function then can fetch an entry from the contact table.
- Contact::getIdForURL($profile, 0);
+ Contact::getIdForURL($profile, 0, true);
$data = Contact::getDetailsByURL($profile);
if (x($data, "name") && x($data, "addr")) {
$userid_compact = $data["name"] . " (" . $data["addr"] . ")";
} else {
- $userid_compact = Network::getAddrFromProfileUrl($profile, $author);
+ $userid_compact = Protocol::getAddrFromProfileUrl($profile, $author);
}
if (x($data, "addr")) {
$userid = $data["addr"];
} else {
- $userid = Network::formatMention($profile, $author);
+ $userid = Protocol::formatMention($profile, $author);
}
if (x($data, "name")) {
$text = $preshare . html_entity_decode("♲ ", ENT_QUOTES, 'UTF-8') . ' ' . $userid_compact . ": <br />" . $share[3];
break;
case 3: // Diaspora
- $headline .= '<b>' . html_entity_decode("♲ ", ENT_QUOTES, 'UTF-8') . $userid . ':</b><br />';
+ $headline = '<b>' . html_entity_decode("♲ ", ENT_QUOTES, 'UTF-8') . $userid . ':</b><br />';
$text = trim($share[1]);
}
if (stripos(normalise_link($link), 'http://twitter.com/') === 0) {
+ $text .= '<br /><a href="' . $link . '">' . $link . '</a>';
+ } else {
$text .= $headline . '<blockquote>' . trim($share[3]) . "</blockquote><br />";
if ($link != "") {
$text .= '<br /><a href="' . $link . '">[l]</a>';
}
- } else {
- $text .= '<br /><a href="' . $link . '">' . $link . '</a>';
}
break;
case 4:
- $headline .= '<br /><b>' . html_entity_decode("♲ ", ENT_QUOTES, 'UTF-8');
+ $headline = '<br /><b>' . html_entity_decode("♲ ", ENT_QUOTES, 'UTF-8');
$headline .= L10n::t('<a href="%1$s" target="_blank">%2$s</a> %3$s', $link, $userid, $posted);
$headline .= ":</b><br />";
$text = Cache::get($match[1]);
if (is_null($text)) {
- $a = get_app();
+ $a = self::getApp();
$stamp1 = microtime(true);
$doc = new DOMDocument();
@$doc->loadHTML($body);
- $xpath = new DomXPath($doc);
+ $xpath = new DOMXPath($doc);
$list = $xpath->query("//meta[@name]");
foreach ($list as $node) {
$attr = [];
$text = Cache::get($match[1]);
if (is_null($text)) {
- $a = get_app();
+ $a = self::getApp();
$stamp1 = microtime(true);
$doc = new DOMDocument();
@$doc->loadHTML($body);
- $xpath = new DomXPath($doc);
+ $xpath = new DOMXPath($doc);
$list = $xpath->query("//meta[@name]");
foreach ($list as $node) {
$attr = [];
* - 8: Used for WP backlink text setting
*
* @param string $text
- * @param bool $preserve_nl
* @param bool $try_oembed
* @param int $simple_html
* @param bool $for_plaintext
* @return string
*/
- public static function convert($text, $preserve_nl = false, $try_oembed = true, $simple_html = false, $for_plaintext = false)
+ public static function convert($text, $try_oembed = true, $simple_html = false, $for_plaintext = false)
{
- $a = get_app();
+ $a = self::getApp();
/*
* preg_match_callback function to replace potential Oembed tags with Oembed content
$text = preg_replace_callback("/\[pre\](.*?)\[\/pre\]/ism", 'self::escapeNoparseCallback', $text);
// Remove the abstract element. It is a non visible element.
- $text = self::removeAbstract($text);
+ $text = self::stripAbstract($text);
// Move all spaces out of the tags
$text = preg_replace("/\[(\w*)\](\s*)/ism", '$2[$1]', $text);
$autolink_regex = "/([^\]\='".'"'."]|^)(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,]+)/ism";
$text = preg_replace($autolink_regex, '$1[url]$2[/url]', $text);
if ($simple_html == 7) {
- $text = preg_replace_callback("/\[url\]([$URLSearchString]*)\[\/url\]/ism", 'self::convertUrlForMastodonCallback', $text);
- $text = preg_replace_callback("/\[url\=([$URLSearchString]*)\]([$URLSearchString]*)\[\/url\]/ism", 'self::convertUrlForMastodonCallback', $text);
+ $text = preg_replace_callback("/\[url\]([$URLSearchString]*)\[\/url\]/ism", 'self::convertUrlForOStatusCallback', $text);
+ $text = preg_replace_callback("/\[url\=([$URLSearchString]*)\]([$URLSearchString]*)\[\/url\]/ism", 'self::convertUrlForOStatusCallback', $text);
}
} else {
$text = preg_replace("(\[url\]([$URLSearchString]*)\[\/url\])ism", " $1 ", $text);
$text = str_replace(["\r","\n"], ['<br />', '<br />'], $text);
- if ($preserve_nl) {
- $text = str_replace(["\n", "\r"], ['', ''], $text);
- }
-
// Remove all hashtag addresses
if ((!$try_oembed || $simple_html) && !in_array($simple_html, [3, 7])) {
$text = preg_replace("/([#@!])\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", '$1$3', $text);
if (strpos($text, '[/map]') !== false) {
$text = preg_replace_callback(
"/\[map\](.*?)\[\/map\]/ism",
- function ($match) {
- // the extra space in the following line is intentional
- // Whyyy? - @MrPetovan
- return str_replace($match[0], '<div class="map" >' . Map::byLocation($match[1]) . '</div>', $match[0]);
+ function ($match) use ($simple_html) {
+ return str_replace($match[0], '<p class="map">' . Map::byLocation($match[1], $simple_html) . '</p>', $match[0]);
},
$text
);
if (strpos($text, '[map=') !== false) {
$text = preg_replace_callback(
"/\[map=(.*?)\]/ism",
- function ($match) {
- // the extra space in the following line is intentional
- // Whyyy? - @MrPetovan
- return str_replace($match[0], '<div class="map" >' . Map::byCoordinates(str_replace('/', ' ', $match[1])) . '</div>', $match[0]);
+ function ($match) use ($simple_html) {
+ return str_replace($match[0], '<p class="map">' . Map::byCoordinates(str_replace('/', ' ', $match[1]), $simple_html) . '</p>', $match[0]);
},
$text
);
}
if (strpos($text, '[map]') !== false) {
- $text = preg_replace("/\[map\]/", '<div class="map"></div>', $text);
+ $text = preg_replace("/\[map\]/", '<p class="map"></p>', $text);
}
// Check for headers
$text = preg_replace_callback(
"(\[style=(.*?)\](.*?)\[\/style\])ism",
function ($match) {
- return "<span style=\"" . self::cleanCss($match[1]) . ";\">" . $match[2] . "</span>";
+ return "<span style=\"" . HTML::sanitizeCSS($match[1]) . ";\">" . $match[2] . "</span>";
},
$text
);
$text = preg_replace_callback(
"(\[class=(.*?)\](.*?)\[\/class\])ism",
function ($match) {
- return "<span class=\"" . self::cleanCss($match[1]) . "\">" . $match[2] . "</span>";
+ return "<span class=\"" . HTML::sanitizeCSS($match[1]) . "\">" . $match[2] . "</span>";
},
$text
);
$endlessloop = 0;
while ((strpos($text, "[/quote]")!== false) && (strpos($text, "[quote=") !== false) && (++$endlessloop < 20)) {
$text = preg_replace("/\[quote=[\"\']*(.*?)[\"\']*\](.*?)\[\/quote\]/ism",
- "<br /><strong class=".'"author"'.">" . $t_wrote . "</strong><blockquote>$2</blockquote>",
+ "<p><strong class=".'"author"'.">" . $t_wrote . "</strong></p><blockquote>$2</blockquote>",
$text);
}
}
/**
- * @brief Removes the "abstract" element from the text
+ * @brief Strips the "abstract" tag from the provided text
*
* @param string $text The text with BBCode
* @return string The same text - but without "abstract" element
*/
- public static function removeAbstract($text)
+ public static function stripAbstract($text)
{
$text = preg_replace("/[\s|\n]*\[abstract\].*?\[\/abstract\][\s|\n]*/ism", '', $text);
$text = preg_replace("/[\s|\n]*\[abstract=.*?\].*?\[\/abstract][\s|\n]*/ism", '', $text);
return $abstract;
}
+
+ /**
+ * @brief Callback function to replace a Friendica style mention in a mention for Diaspora
+ *
+ * @param array $match Matching values for the callback
+ * @return string Replaced mention
+ */
+ private static function bbCodeMention2DiasporaCallback($match)
+ {
+ $contact = Contact::getDetailsByURL($match[3]);
+
+ if (empty($contact['addr'])) {
+ $contact = Probe::uri($match[3]);
+ }
+
+ if (empty($contact['addr'])) {
+ return $match[0];
+ }
+
+ $mention = '@{' . $match[2] . '; ' . $contact['addr'] . '}';
+ return $mention;
+ }
+
+ /**
+ * @brief Converts a BBCode text into Markdown
+ *
+ * This function converts a BBCode item body to be sent to Markdown-enabled
+ * systems like Diaspora and Libertree
+ *
+ * @param string $text
+ * @param bool $for_diaspora Diaspora requires more changes than Libertree
+ * @return string
+ */
+ public static function toMarkdown($text, $for_diaspora = true)
+ {
+ $a = self::getApp();
+
+ $original_text = $text;
+
+ // Since Diaspora is creating a summary for links, this function removes them before posting
+ if ($for_diaspora) {
+ $text = self::removeShareInformation($text);
+ }
+
+ /**
+ * Transform #tags, strip off the [url] and replace spaces with underscore
+ */
+ $url_search_string = "^\[\]";
+ $text = preg_replace_callback("/#\[url\=([$url_search_string]*)\](.*?)\[\/url\]/i",
+ function ($matches) {
+ return '#' . str_replace(' ', '_', $matches[2]);
+ },
+ $text
+ );
+
+ // Converting images with size parameters to simple images. Markdown doesn't know it.
+ $text = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $text);
+
+ // Extracting multi-line code blocks before the whitespace processing/code highlighter in self::convert()
+ $codeblocks = [];
+
+ $text = preg_replace_callback("#\[code(?:=([^\]]*))?\](.*?)\[\/code\]#is",
+ function ($matches) use (&$codeblocks) {
+ $return = $matches[0];
+ if (strpos($matches[2], "\n") !== false) {
+ $return = '#codeblock-' . count($codeblocks) . '#';
+
+ $prefix = '````' . $matches[1] . PHP_EOL;
+ $codeblocks[] = $prefix . trim($matches[2]) . PHP_EOL . '````';
+ }
+ return $return;
+ },
+ $text
+ );
+
+ // Convert it to HTML - don't try oembed
+ if ($for_diaspora) {
+ $text = self::convert($text, false, 3);
+
+ // Add all tags that maybe were removed
+ if (preg_match_all("/#\[url\=([$url_search_string]*)\](.*?)\[\/url\]/ism", $original_text, $tags)) {
+ $tagline = "";
+ foreach ($tags[2] as $tag) {
+ $tag = html_entity_decode($tag, ENT_QUOTES, 'UTF-8');
+ if (!strpos(html_entity_decode($text, ENT_QUOTES, 'UTF-8'), '#' . $tag)) {
+ $tagline .= '#' . $tag . ' ';
+ }
+ }
+ $text = $text . " " . $tagline;
+ }
+ } else {
+ $text = self::convert($text, false, 4);
+ }
+
+ // mask some special HTML chars from conversation to markdown
+ $text = str_replace(['<', '>', '&'], ['&_lt_;', '&_gt_;', '&_amp_;'], $text);
+
+ // If a link is followed by a quote then there should be a newline before it
+ // Maybe we should make this newline at every time before a quote.
+ $text = str_replace(["</a><blockquote>"], ["</a><br><blockquote>"], $text);
+
+ $stamp1 = microtime(true);
+
+ // Now convert HTML to Markdown
+ $converter = new HtmlConverter();
+ $text = $converter->convert($text);
+
+ // unmask the special chars back to HTML
+ $text = str_replace(['&\_lt\_;', '&\_gt\_;', '&\_amp\_;'], ['<', '>', '&'], $text);
+
+ $a->save_timestamp($stamp1, "parser");
+
+ // Libertree has a problem with escaped hashtags.
+ $text = str_replace(['\#'], ['#'], $text);
+
+ // Remove any leading or trailing whitespace, as this will mess up
+ // the Diaspora signature verification and cause the item to disappear
+ $text = trim($text);
+
+ if ($for_diaspora) {
+ $url_search_string = "^\[\]";
+ $text = preg_replace_callback(
+ "/([@]\[(.*?)\])\(([$url_search_string]*?)\)/ism",
+ ['self', 'bbCodeMention2DiasporaCallback'],
+ $text
+ );
+ }
+
+ // Restore code blocks
+ $text = preg_replace_callback('/#codeblock-([0-9]+)#/iU',
+ function ($matches) use ($codeblocks) {
+ $return = '';
+ if (isset($codeblocks[intval($matches[1])])) {
+ $return = $codeblocks[$matches[1]];
+ }
+ return $return;
+ },
+ $text
+ );
+
+ Addon::callHooks('bb2diaspora', $text);
+
+ return $text;
+ }
}