namespace Friendica\Content\Text;
use DOMDocument;
-use DomXPath;
+use DOMXPath;
use Exception;
use Friendica\BaseObject;
use Friendica\Content\OEmbed;
use Friendica\Core\Protocol;
use Friendica\Core\System;
use Friendica\Model\Contact;
+use Friendica\Model\Event;
use Friendica\Network\Probe;
use Friendica\Object\Image;
use Friendica\Util\Map;
use Friendica\Util\ParseUrl;
use League\HTMLToMarkdown\HtmlConverter;
-require_once "include/event.php";
-require_once "include/html2plain.php";
require_once "mod/proxy.php";
class BBCode extends BaseObject
$picturedata = Image::getInfoFromURL($matches[1]);
- if (($picturedata[0] >= 500) && ($picturedata[0] >= $picturedata[1])) {
- $post["image"] = $matches[1];
- } else {
- $post["preview"] = $matches[1];
+ if ($picturedata) {
+ if (($picturedata[0] >= 500) && ($picturedata[0] >= $picturedata[1])) {
+ $post["image"] = $matches[1];
+ } else {
+ $post["preview"] = $matches[1];
+ }
}
}
$body = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $body);
$URLSearchString = "^\[\]";
+
+ $body = preg_replace("/\[img\=([$URLSearchString]*)\](.*?)\[\/img\]/ism", '[img]$1[/img]', $body);
+
if (preg_match_all("(\[url=([$URLSearchString]*)\]\s*\[img\]([$URLSearchString]*)\[\/img\]\s*\[\/url\])ism", $body, $pictures, PREG_SET_ORDER)) {
if ((count($pictures) == 1) && !$has_title) {
// Checking, if the link goes to a picture
$post["text"] = str_replace($pictures[0][0], "", $body);
} else {
$imgdata = Image::getInfoFromURL($pictures[0][1]);
- if (substr($imgdata["mime"], 0, 6) == "image/") {
+ if ($imgdata && substr($imgdata["mime"], 0, 6) == "image/") {
$post["type"] = "photo";
$post["image"] = $pictures[0][1];
$post["preview"] = $pictures[0][2];
}
/**
- * @brief Convert a message into plaintext for connectors to other networks
+ * @brief Converts a BBCode text into plaintext
*
- * @param array $b The message array that is about to be posted
- * @param int $limit The maximum number of characters when posting to that network
- * @param bool $includedlinks Has an attached link to be included into the message?
- * @param int $htmlmode This triggers the behaviour of the bbcode conversion
- * @param string $target_network Name of the network where the post should go to.
+ * @param bool $keep_urls Whether to keep URLs in the resulting plaintext
*
- * @return string The converted message
+ * @return string
*/
- public static function toPlaintext($b, $limit = 0, $includedlinks = false, $htmlmode = 2, $target_network = "")
+ public static function toPlaintext($text, $keep_urls = true)
{
- // Remove the hash tags
- $URLSearchString = "^\[\]";
- $body = preg_replace("/([#@])\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", '$1$3', $b["body"]);
-
- // Add an URL element if the text contains a raw link
- $body = preg_replace("/([^\]\='".'"'."]|^)(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,]+)/ism", '$1[url]$2[/url]', $body);
-
- // Remove the abstract
- $body = self::stripAbstract($body);
-
- // At first look at data that is attached via "type-..." stuff
- // This will hopefully replaced with a dedicated bbcode later
- //$post = self::getAttachedData($b["body"]);
- $post = self::getAttachedData($body, $b);
-
- if (($b["title"] != "") && ($post["text"] != "")) {
- $post["text"] = trim($b["title"]."\n\n".$post["text"]);
- } elseif ($b["title"] != "") {
- $post["text"] = trim($b["title"]);
+ $naked_text = preg_replace('/\[(.+?)\]/','', $text);
+ if (!$keep_urls) {
+ $naked_text = preg_replace('#https?\://[^\s<]+[^\s\.\)]#i', '', $naked_text);
}
- $abstract = "";
-
- // Fetch the abstract from the given target network
- if ($target_network != "") {
- $default_abstract = self::getAbstract($b["body"]);
- $abstract = self::getAbstract($b["body"], $target_network);
-
- // If we post to a network with no limit we only fetch
- // an abstract exactly for this network
- if (($limit == 0) && ($abstract == $default_abstract)) {
- $abstract = "";
- }
- } else {// Try to guess the correct target network
- switch ($htmlmode) {
- case 8:
- $abstract = self::getAbstract($b["body"], NETWORK_TWITTER);
- break;
- case 7:
- $abstract = self::getAbstract($b["body"], NETWORK_STATUSNET);
- break;
- case 6:
- $abstract = self::getAbstract($b["body"], NETWORK_APPNET);
- break;
- default: // We don't know the exact target.
- // We fetch an abstract since there is a posting limit.
- if ($limit > 0) {
- $abstract = self::getAbstract($b["body"]);
- }
- }
- }
-
- if ($abstract != "") {
- $post["text"] = $abstract;
-
- if ($post["type"] == "text") {
- $post["type"] = "link";
- $post["url"] = $b["plink"];
- }
- }
-
- $html = self::convert($post["text"].$post["after"], false, $htmlmode);
- $msg = html2plain($html, 0, true);
- $msg = trim(html_entity_decode($msg, ENT_QUOTES, 'UTF-8'));
-
- $link = "";
- if ($includedlinks) {
- if ($post["type"] == "link") {
- $link = $post["url"];
- } elseif ($post["type"] == "text") {
- $link = $post["url"];
- } elseif ($post["type"] == "video") {
- $link = $post["url"];
- } elseif ($post["type"] == "photo") {
- $link = $post["image"];
- }
-
- if (($msg == "") && isset($post["title"])) {
- $msg = trim($post["title"]);
- }
-
- if (($msg == "") && isset($post["description"])) {
- $msg = trim($post["description"]);
- }
-
- // If the link is already contained in the post, then it neeedn't to be added again
- // But: if the link is beyond the limit, then it has to be added.
- if (($link != "") && strstr($msg, $link)) {
- $pos = strpos($msg, $link);
-
- // Will the text be shortened in the link?
- // Or is the link the last item in the post?
- if (($limit > 0) && ($pos < $limit) && (($pos + 23 > $limit) || ($pos + strlen($link) == strlen($msg)))) {
- $msg = trim(str_replace($link, "", $msg));
- } elseif (($limit == 0) || ($pos < $limit)) {
- // The limit has to be increased since it will be shortened - but not now
- // Only do it with Twitter (htmlmode = 8)
- if (($limit > 0) && (strlen($link) > 23) && ($htmlmode == 8)) {
- $limit = $limit - 23 + strlen($link);
- }
-
- $link = "";
-
- if ($post["type"] == "text") {
- unset($post["url"]);
- }
- }
- }
- }
-
- if ($limit > 0) {
- // Reduce multiple spaces
- // When posted to a network with limited space, we try to gain space where possible
- while (strpos($msg, " ") !== false) {
- $msg = str_replace(" ", " ", $msg);
- }
-
- // Twitter is using its own limiter, so we always assume that shortened links will have this length
- if (iconv_strlen($link, "UTF-8") > 0) {
- $limit = $limit - 23;
- }
+ return $naked_text;
+ }
- if (iconv_strlen($msg, "UTF-8") > $limit) {
- if (($post["type"] == "text") && isset($post["url"])) {
- $post["url"] = $b["plink"];
- } elseif (!isset($post["url"])) {
- $limit = $limit - 23;
- $post["url"] = $b["plink"];
- // Which purpose has this line? It is now uncommented, but left as a reminder
- //} elseif (strpos($b["body"], "[share") !== false) {
- // $post["url"] = $b["plink"];
- } elseif (PConfig::get($b["uid"], "system", "no_intelligent_shortening")) {
- $post["url"] = $b["plink"];
- }
- $msg = Plaintext::shorten($msg, $limit);
- }
+ private static function proxyUrl($image, $simplehtml = false)
+ {
+ // Only send proxied pictures to API and for internal display
+ if (in_array($simplehtml, [false, 2])) {
+ return proxy_url($image);
+ } else {
+ return $image;
}
-
- $post["text"] = trim($msg);
-
- return($post);
}
public static function scaleExternalImages($srctext, $include_link = true, $scale_replace = false)
$return = '';
if ($simplehtml == 7) {
- $return = self::convertUrlForMastodon($data["url"]);
+ $return = self::convertUrlForOStatus($data["url"]);
} elseif (($simplehtml != 4) && ($simplehtml != 0)) {
$return = sprintf('<a href="%s" target="_blank">%s</a><br>', $data["url"], $data["title"]);
} else {
}
if ($data["image"] != "") {
- $return .= sprintf('<a href="%s" target="_blank"><img src="%s" alt="" title="%s" class="attachment-image" /></a><br />', $data["url"], proxy_url($data["image"]), $data["title"]);
+ $return .= sprintf('<a href="%s" target="_blank"><img src="%s" alt="" title="%s" class="attachment-image" /></a><br />', $data["url"], self::proxyUrl($data["image"], $simplehtml), $data["title"]);
} elseif ($data["preview"] != "") {
- $return .= sprintf('<a href="%s" target="_blank"><img src="%s" alt="" title="%s" class="attachment-preview" /></a><br />', $data["url"], proxy_url($data["preview"]), $data["title"]);
+ $return .= sprintf('<a href="%s" target="_blank"><img src="%s" alt="" title="%s" class="attachment-preview" /></a><br />', $data["url"], self::proxyUrl($data["preview"], $simplehtml), $data["title"]);
}
if (($data["type"] == "photo") && ($data["url"] != "") && ($data["image"] != "")) {
- $return .= sprintf('<a href="%s" target="_blank"><img src="%s" alt="" title="%s" class="attachment-image" /></a>', $data["url"], proxy_url($data["image"]), $data["title"]);
+ $return .= sprintf('<a href="%s" target="_blank"><img src="%s" alt="" title="%s" class="attachment-image" /></a>', $data["url"], self::proxyUrl($data["image"], $simplehtml), $data["title"]);
} else {
$return .= sprintf('<h4><a href="%s">%s</a></h4>', $data['url'], $data['title']);
}
if ($data["description"] != "" && $data["description"] != $data["title"]) {
- $return .= sprintf('<blockquote>%s</blockquote>', trim(self::convert($data["description"])));
+ // Sanitize the HTML by converting it to BBCode
+ $bbcode = HTML::toBBCode($data["description"]);
+ $return .= sprintf('<blockquote>%s</blockquote>', trim(self::convert($bbcode)));
}
-
if ($data["type"] == "link") {
$return .= sprintf('<sup><a href="%s">%s</a></sup>', $data['url'], parse_url($data['url'], PHP_URL_HOST));
}
if (($data["url"] != "") && ($data["title"] != "")) {
$text .= "\n[url=" . $data["url"] . "]" . $data["title"] . "[/url]";
} elseif (($data["url"] != "")) {
- $text .= "\n" . $data["url"];
+ $text .= "\n[url]" . $data["url"] . "[/url]";
}
return $text . "\n" . $data["after"];
}
- private static function cleanCss($input)
- {
- $cleaned = "";
-
- $input = strtolower($input);
-
- for ($i = 0; $i < strlen($input); $i++) {
- $char = substr($input, $i, 1);
-
- if (($char >= "a") && ($char <= "z")) {
- $cleaned .= $char;
- }
-
- if (!(strpos(" #;:0123456789-_.%", $char) === false)) {
- $cleaned .= $char;
- }
- }
-
- return $cleaned;
- }
-
/**
* Converts [url] BBCodes in a format that looks fine on Mastodon. (callback function)
*
* @param array $match Array with the matching values
* @return string reformatted link including HTML codes
*/
- private static function convertUrlForMastodonCallback($match)
+ private static function convertUrlForOStatusCallback($match)
{
$url = $match[1];
return $match[0];
}
- return self::convertUrlForMastodon($url);
+ return self::convertUrlForOStatus($url);
}
/**
- * @brief Converts [url] BBCodes in a format that looks fine on Mastodon and GNU Social.
+ * @brief Converts [url] BBCodes in a format that looks fine on OStatus systems.
* @param string $url URL that is about to be reformatted
* @return string reformatted link including HTML codes
*/
- private static function convertUrlForMastodon($url)
+ private static function convertUrlForOStatus($url)
{
$parts = parse_url($url);
$scheme = $parts['scheme'] . '://';
$styled_url = str_replace($scheme, '', $url);
- $html = '<a href="%s" class="attachment" rel="nofollow noopener" target="_blank">' .
- '<span class="invisible">%s</span>';
-
if (strlen($styled_url) > 30) {
- $html .= '<span class="ellipsis">%s</span>' .
- '<span class="invisible">%s</span></a>';
-
- $ellipsis = substr($styled_url, 0, 30);
- $rest = substr($styled_url, 30);
- return sprintf($html, $url, $scheme, $ellipsis, $rest);
- } else {
- $html .= '%s</a>';
- return sprintf($html, $url, $scheme, $styled_url);
+ $styled_url = substr($styled_url, 0, 30) . "…";
}
+
+ $html = '<a href="%s" target="_blank">%s</a>';
+
+ return sprintf($html, $url, $styled_url);
}
/*
// it loops over the array starting from the first element and going sequentially
// to the last element
$newbody = str_replace('[$#saved_image' . $cnt . '#$]',
- '<img src="' . proxy_url($image) . '" alt="' . L10n::t('Image/photo') . '" />', $newbody);
+ '<img src="' . self::proxyUrl($image) . '" alt="' . L10n::t('Image/photo') . '" />', $newbody);
$cnt++;
}
}
if (stripos(normalise_link($link), 'http://twitter.com/') === 0) {
+ $text .= '<br /><a href="' . $link . '">' . $link . '</a>';
+ } else {
$text .= $headline . '<blockquote>' . trim($share[3]) . "</blockquote><br />";
if ($link != "") {
$text .= '<br /><a href="' . $link . '">[l]</a>';
}
- } else {
- $text .= '<br /><a href="' . $link . '">' . $link . '</a>';
}
break;
$doc = new DOMDocument();
@$doc->loadHTML($body);
- $xpath = new DomXPath($doc);
+ $xpath = new DOMXPath($doc);
$list = $xpath->query("//meta[@name]");
foreach ($list as $node) {
$attr = [];
private static function textHighlightCallback($match)
{
+ // Fallback in case the language doesn't exist
+ $return = '[code]' . $match[2] . '[/code]';
+
if (in_array(strtolower($match[1]),
['php', 'css', 'mysql', 'sql', 'abap', 'diff', 'html', 'perl', 'ruby',
- 'vbscript', 'avrc', 'dtd', 'java', 'xml', 'cpp', 'python', 'javascript', 'js', 'sh'])
+ 'vbscript', 'avrc', 'dtd', 'java', 'xml', 'cpp', 'python', 'javascript', 'js', 'sh', 'bash'])
) {
- return text_highlight($match[2], strtolower($match[1]));
+ $return = text_highlight($match[2], strtolower($match[1]));
}
- return $match[0];
+
+ return $return;
}
/**
// After we're finished processing the bbcode we'll
// replace all of the event code with a reformatted version.
- $ev = bbtoevent($text);
+ $ev = Event::fromBBCode($text);
// Replace any html brackets with HTML Entities to prevent executing HTML or script
// Don't use strip_tags here because it breaks [url] search by replacing & with amp
$autolink_regex = "/([^\]\='".'"'."]|^)(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,]+)/ism";
$text = preg_replace($autolink_regex, '$1[url]$2[/url]', $text);
if ($simple_html == 7) {
- $text = preg_replace_callback("/\[url\]([$URLSearchString]*)\[\/url\]/ism", 'self::convertUrlForMastodonCallback', $text);
- $text = preg_replace_callback("/\[url\=([$URLSearchString]*)\]([$URLSearchString]*)\[\/url\]/ism", 'self::convertUrlForMastodonCallback', $text);
+ $text = preg_replace_callback("/\[url\]([$URLSearchString]*)\[\/url\]/ism", 'self::convertUrlForOStatusCallback', $text);
+ $text = preg_replace_callback("/\[url\=([$URLSearchString]*)\]([$URLSearchString]*)\[\/url\]/ism", 'self::convertUrlForOStatusCallback', $text);
}
} else {
$text = preg_replace("(\[url\]([$URLSearchString]*)\[\/url\])ism", " $1 ", $text);
if (strpos($text, '[/map]') !== false) {
$text = preg_replace_callback(
"/\[map\](.*?)\[\/map\]/ism",
- function ($match) {
- // the extra space in the following line is intentional
- // Whyyy? - @MrPetovan
- return str_replace($match[0], '<div class="map" >' . Map::byLocation($match[1]) . '</div>', $match[0]);
+ function ($match) use ($simple_html) {
+ return str_replace($match[0], '<p class="map">' . Map::byLocation($match[1], $simple_html) . '</p>', $match[0]);
},
$text
);
if (strpos($text, '[map=') !== false) {
$text = preg_replace_callback(
"/\[map=(.*?)\]/ism",
- function ($match) {
- // the extra space in the following line is intentional
- // Whyyy? - @MrPetovan
- return str_replace($match[0], '<div class="map" >' . Map::byCoordinates(str_replace('/', ' ', $match[1])) . '</div>', $match[0]);
+ function ($match) use ($simple_html) {
+ return str_replace($match[0], '<p class="map">' . Map::byCoordinates(str_replace('/', ' ', $match[1]), $simple_html) . '</p>', $match[0]);
},
$text
);
}
if (strpos($text, '[map]') !== false) {
- $text = preg_replace("/\[map\]/", '<div class="map"></div>', $text);
+ $text = preg_replace("/\[map\]/", '<p class="map"></p>', $text);
}
// Check for headers
$text = preg_replace("(\[u\](.*?)\[\/u\])ism", '<u>$1</u>', $text);
// Check for strike-through text
- $text = preg_replace("(\[s\](.*?)\[\/s\])ism", '<strike>$1</strike>', $text);
+ $text = preg_replace("(\[s\](.*?)\[\/s\])ism", '<s>$1</s>', $text);
// Check for over-line text
$text = preg_replace("(\[o\](.*?)\[\/o\])ism", '<span class="overline">$1</span>', $text);
$text = preg_replace_callback(
"(\[style=(.*?)\](.*?)\[\/style\])ism",
function ($match) {
- return "<span style=\"" . self::cleanCss($match[1]) . ";\">" . $match[2] . "</span>";
+ return "<span style=\"" . HTML::sanitizeCSS($match[1]) . ";\">" . $match[2] . "</span>";
},
$text
);
$text = preg_replace_callback(
"(\[class=(.*?)\](.*?)\[\/class\])ism",
function ($match) {
- return "<span class=\"" . self::cleanCss($match[1]) . "\">" . $match[2] . "</span>";
+ return "<span class=\"" . HTML::sanitizeCSS($match[1]) . "\">" . $match[2] . "</span>";
},
$text
);
// [img=widthxheight]image source[/img]
$text = preg_replace_callback(
"/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism",
- function ($matches) {
+ function ($matches) use ($simple_html) {
if (strpos($matches[3], "data:image/") === 0) {
return $matches[0];
}
- $matches[3] = proxy_url($matches[3]);
+ $matches[3] = self::proxyUrl($matches[3], $simple_html);
return "[img=" . $matches[1] . "x" . $matches[2] . "]" . $matches[3] . "[/img]";
},
$text
$text = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '<img src="$3" style="width: $1px;" >', $text);
$text = preg_replace("/\[zmg\=([0-9]*)x([0-9]*)\](.*?)\[\/zmg\]/ism", '<img class="zrl" src="$3" style="width: $1px;" >', $text);
+ $text = preg_replace_callback("/\[img\=([$URLSearchString]*)\](.*?)\[\/img\]/ism",
+ function ($matches) use ($simple_html) {
+ $matches[1] = self::proxyUrl($matches[1], $simple_html);
+ $matches[2] = htmlspecialchars($matches[2], ENT_COMPAT);
+ return '<img src="' . $matches[1] . '" alt="' . $matches[2] . '">';
+ },
+ $text);
+
// Images
// [img]pathtoimage[/img]
$text = preg_replace_callback(
"/\[img\](.*?)\[\/img\]/ism",
- function ($matches) {
+ function ($matches) use ($simple_html) {
if (strpos($matches[1], "data:image/") === 0) {
return $matches[0];
}
- $matches[1] = proxy_url($matches[1]);
+ $matches[1] = self::proxyUrl($matches[1], $simple_html);
return "[img]" . $matches[1] . "[/img]";
},
$text
// Try to Oembed
if ($try_oembed) {
- $text = preg_replace("/\[video\](.*?\.(ogg|ogv|oga|ogm|webm|mp4))\[\/video\]/ism", '<video src="$1" controls="controls" width="' . $a->videowidth . '" height="' . $a->videoheight . '" loop="true"><a href="$1">$1</a></video>', $text);
- $text = preg_replace("/\[audio\](.*?\.(ogg|ogv|oga|ogm|webm|mp4|mp3))\[\/audio\]/ism", '<audio src="$1" controls="controls"><a href="$1">$1</a></audio>', $text);
+ $text = preg_replace("/\[video\](.*?\.(ogg|ogv|oga|ogm|webm|mp4).*?)\[\/video\]/ism", '<video src="$1" controls="controls" width="' . $a->videowidth . '" height="' . $a->videoheight . '" loop="true"><a href="$1">$1</a></video>', $text);
+ $text = preg_replace("/\[audio\](.*?\.(ogg|ogv|oga|ogm|webm|mp4|mp3).*?)\[\/audio\]/ism", '<audio src="$1" controls="controls"><a href="$1">$1</a></audio>', $text);
$text = preg_replace_callback("/\[video\](.*?)\[\/video\]/ism", $try_oembed_callback, $text);
$text = preg_replace_callback("/\[audio\](.*?)\[\/audio\]/ism", $try_oembed_callback, $text);
} else {
- $text = preg_replace("/\[video\](.*?)\[\/video\]/",
+ $text = preg_replace("/\[video\](.*?)\[\/video\]/ism",
'<a href="$1" target="_blank">$1</a>', $text);
- $text = preg_replace("/\[audio\](.*?)\[\/audio\]/",
+ $text = preg_replace("/\[audio\](.*?)\[\/audio\]/ism",
'<a href="$1" target="_blank">$1</a>', $text);
}
// start which is always required). Allow desc with a missing summary for compatibility.
if ((x($ev, 'desc') || x($ev, 'summary')) && x($ev, 'start')) {
- $sub = format_event_html($ev, $simple_html);
+ $sub = Event::getHTML($ev, $simple_html);
$text = preg_replace("/\[event\-summary\](.*?)\[\/event\-summary\]/ism", '', $text);
$text = preg_replace("/\[event\-description\](.*?)\[\/event\-description\]/ism", '', $text);
$text = preg_replace_callback("/\[nobb\](.*?)\[\/nobb\]/ism", 'self::unescapeNoparseCallback', $text);
$text = preg_replace_callback("/\[pre\](.*?)\[\/pre\]/ism", 'self::unescapeNoparseCallback', $text);
-
+ /// @todo What is the meaning of these lines?
$text = preg_replace('/\[\&\;([#a-z0-9]+)\;\]/', '&$1;', $text);
$text = preg_replace('/\&\#039\;/', '\'', $text);
- $text = preg_replace('/\"\;/', '"', $text);
+
+ // Currently deactivated, it made problems with " inside of alt texts.
+ //$text = preg_replace('/\"\;/', '"', $text);
// fix any escaped ampersands that may have been converted into links
$text = preg_replace('/\<([^>]*?)(src|href)=(.*?)\&\;(.*?)\>/ism', '<$1$2=$3&$4>', $text);
* @param string $addon The addon for which the abstract is meant for
* @return string The abstract
*/
- private static function getAbstract($text, $addon = "")
+ public static function getAbstract($text, $addon = "")
{
$abstract = "";
$abstracts = [];