X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=src%2FContent%2FText%2FBBCode.php;h=e304f47637eeaf1b0b2d9cb10a0734d35340685c;hb=69a73678ebc18ad27b27f1bf92bc9433b7e7066f;hp=3054170fc05065fd0c17c1e7cc36843c6a0d3129;hpb=04fb88585783f4883b40ec9d49a4a7fa644fb294;p=friendica.git diff --git a/src/Content/Text/BBCode.php b/src/Content/Text/BBCode.php index 3054170fc0..e304f47637 100644 --- a/src/Content/Text/BBCode.php +++ b/src/Content/Text/BBCode.php @@ -15,7 +15,9 @@ use Friendica\Core\Addon; use Friendica\Core\Cache; use Friendica\Core\Config; use Friendica\Core\L10n; +use Friendica\Core\Logger; use Friendica\Core\Protocol; +use Friendica\Core\Renderer; use Friendica\Core\System; use Friendica\Model\Contact; use Friendica\Model\Event; @@ -25,7 +27,8 @@ use Friendica\Util\Map; use Friendica\Util\Network; use Friendica\Util\ParseUrl; use Friendica\Util\Proxy as ProxyUtils; -use League\HTMLToMarkdown\HtmlConverter; +use Friendica\Util\Strings; +use Friendica\Util\XML; class BBCode extends BaseObject { @@ -127,12 +130,12 @@ class BBCode extends BaseObject $type = ""; preg_match("/type='(.*?)'/ism", $attributes, $matches); - if (x($matches, 1)) { + if (!empty($matches[1])) { $type = strtolower($matches[1]); } preg_match('/type="(.*?)"/ism', $attributes, $matches); - if (x($matches, 1)) { + if (!empty($matches[1])) { $type = strtolower($matches[1]); } @@ -150,12 +153,12 @@ class BBCode extends BaseObject $url = ""; preg_match("/url='(.*?)'/ism", $attributes, $matches); - if (x($matches, 1)) { + if (!empty($matches[1])) { $url = $matches[1]; } preg_match('/url="(.*?)"/ism', $attributes, $matches); - if (x($matches, 1)) { + if (!empty($matches[1])) { $url = $matches[1]; } @@ -165,12 +168,12 @@ class BBCode extends BaseObject $title = ""; preg_match("/title='(.*?)'/ism", $attributes, $matches); - if (x($matches, 1)) { + if (!empty($matches[1])) { $title = $matches[1]; } preg_match('/title="(.*?)"/ism', $attributes, $matches); - if (x($matches, 1)) { + if (!empty($matches[1])) { $title = $matches[1]; } @@ -183,12 +186,12 @@ class BBCode extends BaseObject $image = ""; preg_match("/image='(.*?)'/ism", $attributes, $matches); - if (x($matches, 1)) { + if (!empty($matches[1])) { $image = $matches[1]; } preg_match('/image="(.*?)"/ism', $attributes, $matches); - if (x($matches, 1)) { + if (!empty($matches[1])) { $image = $matches[1]; } @@ -198,12 +201,12 @@ class BBCode extends BaseObject $preview = ""; preg_match("/preview='(.*?)'/ism", $attributes, $matches); - if (x($matches, 1)) { + if (!empty($matches[1])) { $preview = $matches[1]; } preg_match('/preview="(.*?)"/ism', $attributes, $matches); - if (x($matches, 1)) { + if (!empty($matches[1])) { $preview = $matches[1]; } @@ -231,7 +234,7 @@ class BBCode extends BaseObject */ $has_title = !empty($item['title']); - $plink = (!empty($item['plink']) ? $item['plink'] : ''); + $plink = defaults($item, 'plink', ''); $post = self::getAttachmentData($body); // if nothing is found, it maybe having an image. @@ -348,7 +351,7 @@ class BBCode extends BaseObject */ public static function toPlaintext($text, $keep_urls = true) { - $naked_text = preg_replace('/\[(.+?)\]/','', $text); + $naked_text = preg_replace('/\[(.+?)\]\s*/','', $text); if (!$keep_urls) { $naked_text = preg_replace('#https?\://[^\s<]+[^\s\.\)]#i', '', $naked_text); } @@ -380,7 +383,7 @@ class BBCode extends BaseObject $c = preg_match_all('/\[img.*?\](.*?)\[\/img\]/ism', $s, $matches, PREG_SET_ORDER); if ($c) { foreach ($matches as $mtch) { - logger('scale_external_image: ' . $mtch[1]); + Logger::log('scale_external_image: ' . $mtch[1]); $hostname = str_replace('www.', '', substr(System::baseUrl(), strpos(System::baseUrl(), '://') + 3)); if (stristr($mtch[1], $hostname)) { @@ -415,7 +418,7 @@ class BBCode extends BaseObject $Image->scaleDown(640); $new_width = $Image->getWidth(); $new_height = $Image->getHeight(); - logger('scale_external_images: ' . $orig_width . '->' . $new_width . 'w ' . $orig_height . '->' . $new_height . 'h' . ' match: ' . $mtch[0], LOGGER_DEBUG); + Logger::log('scale_external_images: ' . $orig_width . '->' . $new_width . 'w ' . $orig_height . '->' . $new_height . 'h' . ' match: ' . $mtch[0], Logger::DEBUG); $s = str_replace( $mtch[0], '[img=' . $new_width . 'x' . $new_height. ']' . $scaled . '[/img]' @@ -424,7 +427,7 @@ class BBCode extends BaseObject : ''), $s ); - logger('scale_external_images: new string: ' . $s, LOGGER_DEBUG); + Logger::log('scale_external_images: new string: ' . $s, Logger::DEBUG); } } } @@ -452,7 +455,7 @@ class BBCode extends BaseObject // than the maximum, then don't waste time looking for the images if ($maxlen && (strlen($body) > $maxlen)) { - logger('the total body length exceeds the limit', LOGGER_DEBUG); + Logger::log('the total body length exceeds the limit', Logger::DEBUG); $orig_body = $body; $new_body = ''; @@ -472,7 +475,7 @@ class BBCode extends BaseObject if (($textlen + $img_start) > $maxlen) { if ($textlen < $maxlen) { - logger('the limit happens before an embedded image', LOGGER_DEBUG); + Logger::log('the limit happens before an embedded image', Logger::DEBUG); $new_body = $new_body . substr($orig_body, 0, $maxlen - $textlen); $textlen = $maxlen; } @@ -486,7 +489,7 @@ class BBCode extends BaseObject if (($textlen + $img_end) > $maxlen) { if ($textlen < $maxlen) { - logger('the limit happens before the end of a non-embedded image', LOGGER_DEBUG); + Logger::log('the limit happens before the end of a non-embedded image', Logger::DEBUG); $new_body = $new_body . substr($orig_body, 0, $maxlen - $textlen); $textlen = $maxlen; } @@ -509,11 +512,11 @@ class BBCode extends BaseObject if (($textlen + strlen($orig_body)) > $maxlen) { if ($textlen < $maxlen) { - logger('the limit happens after the end of the last image', LOGGER_DEBUG); + Logger::log('the limit happens after the end of the last image', Logger::DEBUG); $new_body = $new_body . substr($orig_body, 0, $maxlen - $textlen); } } else { - logger('the text size with embedded images extracted did not violate the limit', LOGGER_DEBUG); + Logger::log('the text size with embedded images extracted did not violate the limit', Logger::DEBUG); $new_body = $new_body . $orig_body; } @@ -572,17 +575,17 @@ class BBCode extends BaseObject $return = sprintf('
' . html_entity_decode('♲ ', ENT_QUOTES, 'UTF-8') . ' ' . $mention . ':
' . "\n" . '«' . $content . '»'; break; case 2: - $text = $preshare . html_entity_decode("♲ ", ENT_QUOTES, 'UTF-8') . ' ' . $userid_compact . ":' . html_entity_decode('♲ ', ENT_QUOTES, 'UTF-8') . ' ' . $author_contact['addr'] . ':
' . "\n" . $content; break; case 3: // Diaspora - $headline = '' . html_entity_decode("♲ ", ENT_QUOTES, 'UTF-8') . $userid . ':' . html_entity_decode('♲ ', ENT_QUOTES, 'UTF-8') . $mention . ':
' . "\n"; - if ($text != "") { - $text .= "' . trim($share[3]) . "
' . trim($content) . '' . "\n"; - if ($link != "") { - $text .= '
' . html_entity_decode('♲ ', ENT_QUOTES, 'UTF-8'); + $headline .= L10n::t('%2$s %3$s', $attributes['link'], $mention, $attributes['posted']); + $headline .= ':
' . "\n"; - $text .= $headline . '' . trim($share[3]) . "
' . trim($content) . '' . "\n"; break; case 5: - $text = $preshare . html_entity_decode("♲ ", ENT_QUOTES, 'UTF-8') . ' ' . $userid_compact . ":
' . html_entity_decode('♲ ', ENT_QUOTES, 'UTF-8') . ' ' . $author_contact['addr'] . ':
' . "\n" . $content; break; case 7: // statusnet/GNU Social - $text = $preshare . html_entity_decode("♲ ", ENT_QUOTES, 'UTF-8') . " @" . $userid_compact . ": " . $share[3]; - break; - case 8: // twitter - $text = $preshare . "RT @" . $userid_compact . ": " . $share[3]; + $text = ($is_quote_share? '' . html_entity_decode('♲ ', ENT_QUOTES, 'UTF-8') . ' @' . $author_contact['addr'] . ': ' . $content . '
' . "\n"; break; case 9: // Google+ - $text = $preshare . html_entity_decode("♲ ", ENT_QUOTES, 'UTF-8') . ' ' . $userid_compact . ":' . html_entity_decode('♲ ', ENT_QUOTES, 'UTF-8') . ' ' . $author_contact['addr'] . ':
' . "\n"; + $text .= '' . $content . '
' . "\n"; - if ($link != "") { - $text .= "' . $attributes['link'] . '
'; } break; default: // Transforms quoted tweets in rich attachments to avoid nested tweets - if (stripos(normalise_link($link), 'http://twitter.com/') === 0 && OEmbed::isAllowedURL($link)) { + if (stripos(Strings::normaliseLink($attributes['link']), 'http://twitter.com/') === 0 && OEmbed::isAllowedURL($attributes['link'])) { try { - $oembed = OEmbed::getHTML($link, $preshare); + $text = ($is_quote_share? '' . trim($matches[2], "\n\r") . '
';
+ }
+ return $return;
+ },
+ $text
+ );
+
// Hide all [noparse] contained bbtags by spacefying them
// POSSIBLE BUG --> Will the 'preg' functions crash if there's an embedded image?
@@ -1265,19 +1222,11 @@ class BBCode extends BaseObject
$text = preg_replace("/\s?\[share(.*?)\]\s?(.*?)\s?\[\/share\]\s?/ism", "[share$1]$2[/share]", $text);
$text = preg_replace("/\s?\[quote(.*?)\]\s?(.*?)\s?\[\/quote\]\s?/ism", "[quote$1]$2[/quote]", $text);
- $text = preg_replace("/\n\[code\]/ism", "[code]", $text);
- $text = preg_replace("/\[\/code\]\n/ism", "[/code]", $text);
-
// when the content is meant exporting to other systems then remove the avatar picture since this doesn't really look good on these systems
if (!$try_oembed) {
$text = preg_replace("/\[share(.*?)avatar\s?=\s?'.*?'\s?(.*?)\]\s?(.*?)\s?\[\/share\]\s?/ism", "\n[share$1$2]$3[/share]", $text);
}
- // Check for [code] text here, before the linefeeds are messed with.
- // The highlighter will unescape and re-escape the content.
- if (strpos($text, '[code=') !== false) {
- $text = preg_replace_callback("/\[code=(.*?)\](.*?)\[\/code\]/ism", 'self::textHighlightCallback', $text);
- }
// Convert new line chars to html ([^<]*)
(?! for Diaspora inline code blocks
- if ($simple_html === 3) {
- $return = '' . $match[1] . '
';
- }
- return $return;
- }
- , $text);
-
// Unhide all [noparse] contained bbtags unspacefying them
// and triming the [noparse] tag.
@@ -1771,6 +1716,18 @@ class BBCode extends BaseObject
$text = self::interpolateSavedImagesIntoItemBody($text, $saved_image);
}
+ // Restore code blocks
+ $text = preg_replace_callback('/#codeblock-([0-9]+)#/iU',
+ function ($matches) use ($codeblocks) {
+ $return = $matches[0];
+ if (isset($codeblocks[intval($matches[1])])) {
+ $return = $codeblocks[$matches[1]];
+ }
+ return $return;
+ },
+ $text
+ );
+
// Clean up the HTML by loading and saving the HTML with the DOM.
// Bad structured html can break a whole page.
// For performance reasons do it only with ativated item cache or at export.
@@ -1905,23 +1862,6 @@ class BBCode extends BaseObject
// Converting images with size parameters to simple images. Markdown doesn't know it.
$text = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $text);
- // Extracting multi-line code blocks before the whitespace processing/code highlighter in self::convert()
- $codeblocks = [];
-
- $text = preg_replace_callback("#\[code(?:=([^\]]*))?\](.*?)\[\/code\]#is",
- function ($matches) use (&$codeblocks) {
- $return = $matches[0];
- if (strpos($matches[2], "\n") !== false) {
- $return = '#codeblock-' . count($codeblocks) . '#';
-
- $prefix = '````' . $matches[1] . PHP_EOL;
- $codeblocks[] = $prefix . trim($matches[2]) . PHP_EOL . '````';
- }
- return $return;
- },
- $text
- );
-
// Convert it to HTML - don't try oembed
if ($for_diaspora) {
$text = self::convert($text, false, 3);
@@ -1951,13 +1891,12 @@ class BBCode extends BaseObject
$stamp1 = microtime(true);
// Now convert HTML to Markdown
- $converter = new HtmlConverter();
- $text = $converter->convert($text);
+ $text = HTML::toMarkdown($text);
// unmask the special chars back to HTML
$text = str_replace(['&\_lt\_;', '&\_gt\_;', '&\_amp\_;'], ['<', '>', '&'], $text);
- $a->save_timestamp($stamp1, "parser");
+ $a->saveTimestamp($stamp1, "parser");
// Libertree has a problem with escaped hashtags.
$text = str_replace(['\#'], ['#'], $text);
@@ -1975,20 +1914,82 @@ class BBCode extends BaseObject
);
}
- // Restore code blocks
- $text = preg_replace_callback('/#codeblock-([0-9]+)#/iU',
- function ($matches) use ($codeblocks) {
- $return = '';
- if (isset($codeblocks[intval($matches[1])])) {
- $return = $codeblocks[$matches[1]];
- }
- return $return;
- },
- $text
- );
-
Addon::callHooks('bb2diaspora', $text);
return $text;
}
+
+ /**
+ * @brief Pull out all #hashtags and @person tags from $string.
+ *
+ * We also get @person@domain.com - which would make
+ * the regex quite complicated as tags can also
+ * end a sentence. So we'll run through our results
+ * and strip the period from any tags which end with one.
+ * Returns array of tags found, or empty array.
+ *
+ * @param string $string Post content
+ *
+ * @return array List of tag and person names
+ */
+ public static function getTags($string)
+ {
+ $ret = [];
+
+ // Convert hashtag links to hashtags
+ $string = preg_replace('/#\[url\=([^\[\]]*)\](.*?)\[\/url\]/ism', '#$2', $string);
+
+ // ignore anything in a code block
+ $string = preg_replace('/\[code\](.*?)\[\/code\]/sm', '', $string);
+
+ // Force line feeds at bbtags
+ $string = str_replace(['[', ']'], ["\n[", "]\n"], $string);
+
+ // ignore anything in a bbtag
+ $string = preg_replace('/\[(.*?)\]/sm', '', $string);
+
+ // Match full names against @tags including the space between first and last
+ // We will look these up afterward to see if they are full names or not recognisable.
+
+ if (preg_match_all('/(@[^ \x0D\x0A,:?]+ [^ \x0D\x0A@,:?]+)([ \x0D\x0A@,:?]|$)/', $string, $matches)) {
+ foreach ($matches[1] as $match) {
+ if (strstr($match, ']')) {
+ // we might be inside a bbcode color tag - leave it alone
+ continue;
+ }
+
+ if (substr($match, -1, 1) === '.') {
+ $ret[] = substr($match, 0, -1);
+ } else {
+ $ret[] = $match;
+ }
+ }
+ }
+
+ // Otherwise pull out single word tags. These can be @nickname, @first_last
+ // and #hash tags.
+
+ if (preg_match_all('/([!#@][^\^ \x0D\x0A,;:?]+)([ \x0D\x0A,;:?]|$)/', $string, $matches)) {
+ foreach ($matches[1] as $match) {
+ if (strstr($match, ']')) {
+ // we might be inside a bbcode color tag - leave it alone
+ continue;
+ }
+ if (substr($match, -1, 1) === '.') {
+ $match = substr($match,0,-1);
+ }
+ // ignore strictly numeric tags like #1
+ if ((strpos($match, '#') === 0) && ctype_digit(substr($match, 1))) {
+ continue;
+ }
+ // try not to catch url fragments
+ if (strpos($string, $match) && preg_match('/[a-zA-z0-9\/]/', substr($string, strpos($string, $match) - 1, 1))) {
+ continue;
+ }
+ $ret[] = $match;
+ }
+ }
+
+ return $ret;
+ }
}