<?php
+
/**
* @file src/Content/Text/BBCode.php
*/
+
namespace Friendica\Content\Text;
use DOMDocument;
-use DomXPath;
+use DOMXPath;
use Exception;
+use Friendica\BaseObject;
use Friendica\Content\OEmbed;
use Friendica\Content\Smilies;
-use Friendica\Content\Text\Plaintext;
use Friendica\Core\Addon;
use Friendica\Core\Cache;
use Friendica\Core\Config;
use Friendica\Core\L10n;
-use Friendica\Core\Protocol;
use Friendica\Core\PConfig;
+use Friendica\Core\Protocol;
use Friendica\Core\System;
use Friendica\Model\Contact;
+use Friendica\Network\Probe;
use Friendica\Object\Image;
use Friendica\Util\Map;
use Friendica\Util\Network;
use Friendica\Util\ParseUrl;
+use League\HTMLToMarkdown\HtmlConverter;
require_once "include/event.php";
-require_once "include/html2plain.php";
-require_once "include/html2bbcode.php";
require_once "mod/proxy.php";
-class BBCode
+class BBCode extends BaseObject
{
/**
* @brief Fetches attachment data that were generated the old way
}
if ($title != "") {
- $title = BBCode::convert(html_entity_decode($title, ENT_QUOTES, 'UTF-8'), false, true);
+ $title = self::convert(html_entity_decode($title, ENT_QUOTES, 'UTF-8'), false, true);
$title = html_entity_decode($title, ENT_QUOTES, 'UTF-8');
$title = str_replace(["[", "]"], ["[", "]"], $title);
$data["title"] = $title;
}
}
- $html = BBCode::convert($post["text"].$post["after"], false, $htmlmode);
- $msg = html2plain($html, 0, true);
+ $html = self::convert($post["text"].$post["after"], false, $htmlmode);
+ $msg = HTML::toPlaintext($html, 0, true);
$msg = trim(html_entity_decode($msg, ENT_QUOTES, 'UTF-8'));
$link = "";
$return = '';
if ($simplehtml == 7) {
- $return = self::convertUrlForMastodon($data["url"]);
+ $return = self::convertUrlForOStatus($data["url"]);
} elseif (($simplehtml != 4) && ($simplehtml != 0)) {
$return = sprintf('<a href="%s" target="_blank">%s</a><br>', $data["url"], $data["title"]);
} else {
if ($data["description"] != "" && $data["description"] != $data["title"]) {
// Sanitize the HTML by converting it to BBCode
- $bbcode = html2bbcode($data["description"]);
+ $bbcode = HTML::toBBCode($data["description"]);
$return .= sprintf('<blockquote>%s</blockquote>', trim(self::convert($bbcode)));
}
if ($data["type"] == "link") {
return $text . "\n" . $data["after"];
}
- private static function cleanCss($input)
- {
- $cleaned = "";
-
- $input = strtolower($input);
-
- for ($i = 0; $i < strlen($input); $i++) {
- $char = substr($input, $i, 1);
-
- if (($char >= "a") && ($char <= "z")) {
- $cleaned .= $char;
- }
-
- if (!(strpos(" #;:0123456789-_.%", $char) === false)) {
- $cleaned .= $char;
- }
- }
-
- return $cleaned;
- }
-
/**
* Converts [url] BBCodes in a format that looks fine on Mastodon. (callback function)
*
* @param array $match Array with the matching values
* @return string reformatted link including HTML codes
*/
- private static function convertUrlForMastodonCallback($match)
+ private static function convertUrlForOStatusCallback($match)
{
$url = $match[1];
return $match[0];
}
- return self::convertUrlForMastodon($url);
+ return self::convertUrlForOStatus($url);
}
/**
- * @brief Converts [url] BBCodes in a format that looks fine on Mastodon and GNU Social.
+ * @brief Converts [url] BBCodes in a format that looks fine on OStatus systems.
* @param string $url URL that is about to be reformatted
* @return string reformatted link including HTML codes
*/
- private static function convertUrlForMastodon($url)
+ private static function convertUrlForOStatus($url)
{
$parts = parse_url($url);
$scheme = $parts['scheme'] . '://';
$styled_url = str_replace($scheme, '', $url);
- $html = '<a href="%s" class="attachment" rel="nofollow noopener" target="_blank">' .
- '<span class="invisible">%s</span>';
-
if (strlen($styled_url) > 30) {
- $html .= '<span class="ellipsis">%s</span>' .
- '<span class="invisible">%s</span></a>';
-
- $ellipsis = substr($styled_url, 0, 30);
- $rest = substr($styled_url, 30);
- return sprintf($html, $url, $scheme, $ellipsis, $rest);
- } else {
- $html .= '%s</a>';
- return sprintf($html, $url, $scheme, $styled_url);
- }
- }
-
- /**
- * @brief Shortens [url] BBCodes in a format that looks less ugly than the full address. (callback function)
- * @param array $match Array with the matching values
- * @return string reformatted link including HTML codes
- */
- private static function shortenVisibleUrlCallback($match)
- {
- $url = $match[1];
-
- if (isset($match[2]) && ($match[1] != $match[2])) {
- return $match[0];
- }
-
- $parts = parse_url($url);
- if (!isset($parts['scheme'])) {
- return $match[0];
+ $styled_url = substr($styled_url, 0, 30) . "…";
}
- return self::shortenVisibleUrl($url);
- }
-
- /**
- * @brief Shortens [url] BBCodes in a format that looks less ugly than the full address.
- * @param string $url URL that is about to be reformatted
- * @return string reformatted link including HTML codes
- */
- private static function shortenVisibleUrl($url)
- {
- $parts = parse_url($url);
- $scheme = $parts['scheme'] . '://';
- $styled_url = str_replace($scheme, '', $url);
-
-// Currently deactivated, due to preview problems inside of Diaspora
-// if (strlen($styled_url) > 30) {
-// $styled_url = substr($styled_url, 0, 30) . "…";
-// }
-
$html = '<a href="%s" target="_blank">%s</a>';
return sprintf($html, $url, $styled_url);
$text = Cache::get($match[1]);
if (is_null($text)) {
- $a = get_app();
+ $a = self::getApp();
$stamp1 = microtime(true);
$doc = new DOMDocument();
@$doc->loadHTML($body);
- $xpath = new DomXPath($doc);
+ $xpath = new DOMXPath($doc);
$list = $xpath->query("//meta[@name]");
foreach ($list as $node) {
$attr = [];
$text = Cache::get($match[1]);
if (is_null($text)) {
- $a = get_app();
+ $a = self::getApp();
$stamp1 = microtime(true);
$doc = new DOMDocument();
@$doc->loadHTML($body);
- $xpath = new DomXPath($doc);
+ $xpath = new DOMXPath($doc);
$list = $xpath->query("//meta[@name]");
foreach ($list as $node) {
$attr = [];
*/
public static function convert($text, $try_oembed = true, $simple_html = false, $for_plaintext = false)
{
- $a = get_app();
+ $a = self::getApp();
/*
* preg_match_callback function to replace potential Oembed tags with Oembed content
$autolink_regex = "/([^\]\='".'"'."]|^)(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,]+)/ism";
$text = preg_replace($autolink_regex, '$1[url]$2[/url]', $text);
if ($simple_html == 7) {
- $text = preg_replace_callback("/\[url\]([$URLSearchString]*)\[\/url\]/ism", 'self::convertUrlForMastodonCallback', $text);
- $text = preg_replace_callback("/\[url\=([$URLSearchString]*)\]([$URLSearchString]*)\[\/url\]/ism", 'self::convertUrlForMastodonCallback', $text);
- } else {
- $text = preg_replace_callback("/\[url\]([$URLSearchString]*)\[\/url\]/ism", 'self::shortenVisibleUrlCallback', $text);
- $text = preg_replace_callback("/\[url\=([$URLSearchString]*)\]([$URLSearchString]*)\[\/url\]/ism", 'self::shortenVisibleUrlCallback', $text);
+ $text = preg_replace_callback("/\[url\]([$URLSearchString]*)\[\/url\]/ism", 'self::convertUrlForOStatusCallback', $text);
+ $text = preg_replace_callback("/\[url\=([$URLSearchString]*)\]([$URLSearchString]*)\[\/url\]/ism", 'self::convertUrlForOStatusCallback', $text);
}
} else {
$text = preg_replace("(\[url\]([$URLSearchString]*)\[\/url\])ism", " $1 ", $text);
$text = preg_replace_callback(
"(\[style=(.*?)\](.*?)\[\/style\])ism",
function ($match) {
- return "<span style=\"" . self::cleanCss($match[1]) . ";\">" . $match[2] . "</span>";
+ return "<span style=\"" . HTML::sanitizeCSS($match[1]) . ";\">" . $match[2] . "</span>";
},
$text
);
$text = preg_replace_callback(
"(\[class=(.*?)\](.*?)\[\/class\])ism",
function ($match) {
- return "<span class=\"" . self::cleanCss($match[1]) . "\">" . $match[2] . "</span>";
+ return "<span class=\"" . HTML::sanitizeCSS($match[1]) . "\">" . $match[2] . "</span>";
},
$text
);
return $abstract;
}
+
+ /**
+ * @brief Callback function to replace a Friendica style mention in a mention for Diaspora
+ *
+ * @param array $match Matching values for the callback
+ * @return string Replaced mention
+ */
+ private static function bbCodeMention2DiasporaCallback($match)
+ {
+ $contact = Contact::getDetailsByURL($match[3]);
+
+ if (empty($contact['addr'])) {
+ $contact = Probe::uri($match[3]);
+ }
+
+ if (empty($contact['addr'])) {
+ return $match[0];
+ }
+
+ $mention = '@{' . $match[2] . '; ' . $contact['addr'] . '}';
+ return $mention;
+ }
+
+ /**
+ * @brief Converts a BBCode text into Markdown
+ *
+ * This function converts a BBCode item body to be sent to Markdown-enabled
+ * systems like Diaspora and Libertree
+ *
+ * @param string $text
+ * @param bool $for_diaspora Diaspora requires more changes than Libertree
+ * @return string
+ */
+ public static function toMarkdown($text, $for_diaspora = true)
+ {
+ $a = self::getApp();
+
+ $original_text = $text;
+
+ // Since Diaspora is creating a summary for links, this function removes them before posting
+ if ($for_diaspora) {
+ $text = self::removeShareInformation($text);
+ }
+
+ /**
+ * Transform #tags, strip off the [url] and replace spaces with underscore
+ */
+ $url_search_string = "^\[\]";
+ $text = preg_replace_callback("/#\[url\=([$url_search_string]*)\](.*?)\[\/url\]/i",
+ function ($matches) {
+ return '#' . str_replace(' ', '_', $matches[2]);
+ },
+ $text
+ );
+
+ // Converting images with size parameters to simple images. Markdown doesn't know it.
+ $text = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $text);
+
+ // Extracting multi-line code blocks before the whitespace processing/code highlighter in self::convert()
+ $codeblocks = [];
+
+ $text = preg_replace_callback("#\[code(?:=([^\]]*))?\](.*?)\[\/code\]#is",
+ function ($matches) use (&$codeblocks) {
+ $return = $matches[0];
+ if (strpos($matches[2], "\n") !== false) {
+ $return = '#codeblock-' . count($codeblocks) . '#';
+
+ $prefix = '````' . $matches[1] . PHP_EOL;
+ $codeblocks[] = $prefix . trim($matches[2]) . PHP_EOL . '````';
+ }
+ return $return;
+ },
+ $text
+ );
+
+ // Convert it to HTML - don't try oembed
+ if ($for_diaspora) {
+ $text = self::convert($text, false, 3);
+
+ // Add all tags that maybe were removed
+ if (preg_match_all("/#\[url\=([$url_search_string]*)\](.*?)\[\/url\]/ism", $original_text, $tags)) {
+ $tagline = "";
+ foreach ($tags[2] as $tag) {
+ $tag = html_entity_decode($tag, ENT_QUOTES, 'UTF-8');
+ if (!strpos(html_entity_decode($text, ENT_QUOTES, 'UTF-8'), '#' . $tag)) {
+ $tagline .= '#' . $tag . ' ';
+ }
+ }
+ $text = $text . " " . $tagline;
+ }
+ } else {
+ $text = self::convert($text, false, 4);
+ }
+
+ // mask some special HTML chars from conversation to markdown
+ $text = str_replace(['<', '>', '&'], ['&_lt_;', '&_gt_;', '&_amp_;'], $text);
+
+ // If a link is followed by a quote then there should be a newline before it
+ // Maybe we should make this newline at every time before a quote.
+ $text = str_replace(["</a><blockquote>"], ["</a><br><blockquote>"], $text);
+
+ $stamp1 = microtime(true);
+
+ // Now convert HTML to Markdown
+ $converter = new HtmlConverter();
+ $text = $converter->convert($text);
+
+ // unmask the special chars back to HTML
+ $text = str_replace(['&\_lt\_;', '&\_gt\_;', '&\_amp\_;'], ['<', '>', '&'], $text);
+
+ $a->save_timestamp($stamp1, "parser");
+
+ // Libertree has a problem with escaped hashtags.
+ $text = str_replace(['\#'], ['#'], $text);
+
+ // Remove any leading or trailing whitespace, as this will mess up
+ // the Diaspora signature verification and cause the item to disappear
+ $text = trim($text);
+
+ if ($for_diaspora) {
+ $url_search_string = "^\[\]";
+ $text = preg_replace_callback(
+ "/([@]\[(.*?)\])\(([$url_search_string]*?)\)/ism",
+ ['self', 'bbCodeMention2DiasporaCallback'],
+ $text
+ );
+ }
+
+ // Restore code blocks
+ $text = preg_replace_callback('/#codeblock-([0-9]+)#/iU',
+ function ($matches) use ($codeblocks) {
+ $return = '';
+ if (isset($codeblocks[intval($matches[1])])) {
+ $return = $codeblocks[$matches[1]];
+ }
+ return $return;
+ },
+ $text
+ );
+
+ Addon::callHooks('bb2diaspora', $text);
+
+ return $text;
+ }
}