3 * @file include/text.php
7 use Friendica\Content\ContactSelector;
8 use Friendica\Content\Feature;
9 use Friendica\Content\Smilies;
10 use Friendica\Content\Text\BBCode;
11 use Friendica\Core\Addon;
12 use Friendica\Core\Config;
13 use Friendica\Core\L10n;
14 use Friendica\Core\PConfig;
15 use Friendica\Core\Protocol;
16 use Friendica\Core\System;
17 use Friendica\Database\DBA;
18 use Friendica\Model\Contact;
19 use Friendica\Model\Event;
20 use Friendica\Model\Item;
21 use Friendica\Render\FriendicaSmarty;
22 use Friendica\Util\DateTimeFormat;
23 use Friendica\Util\Map;
24 use Friendica\Util\Proxy as ProxyUtils;
26 use Friendica\Core\Logger;
27 use Friendica\Core\Renderer;
28 use Friendica\Model\FileTag;
29 use Friendica\Util\XML;
30 use Friendica\Content\Text\HTML;
32 require_once "include/conversation.php";
35 * @brief Generates a pseudo-random string of hexadecimal characters
40 function random_string($size = 64)
42 $byte_size = ceil($size / 2);
44 $bytes = random_bytes($byte_size);
46 $return = substr(bin2hex($bytes), 0, $size);
52 * This is our primary input filter.
54 * The high bit hack only involved some old IE browser, forget which (IE5/Mac?)
55 * that had an XSS attack vector due to stripping the high-bit on an 8-bit character
56 * after cleansing, and angle chars with the high bit set could get through as markup.
58 * This is now disabled because it was interfering with some legitimate unicode sequences
59 * and hopefully there aren't a lot of those browsers left.
61 * Use this on any text input where angle chars are not valid or permitted
62 * They will be replaced with safer brackets. This may be filtered further
63 * if these are not allowed either.
65 * @param string $string Input string
66 * @return string Filtered string
68 function notags($string) {
69 return str_replace(["<", ">"], ['[', ']'], $string);
71 // High-bit filter no longer used
72 // return str_replace(array("<",">","\xBA","\xBC","\xBE"), array('[',']','','',''), $string);
77 * use this on "body" or "content" input where angle chars shouldn't be removed,
78 * and allow them to be safely displayed.
79 * @param string $string
82 function escape_tags($string) {
83 return htmlspecialchars($string, ENT_COMPAT, 'UTF-8', false);
88 * generate a string that's random, but usually pronounceable.
89 * used to generate initial passwords
93 function autoname($len) {
99 $vowels = ['a','a','ai','au','e','e','e','ee','ea','i','ie','o','ou','u'];
100 if (mt_rand(0, 5) == 4) {
119 's','sc','sh','sm','sp','st',
127 $midcons = ['ck','ct','gn','ld','lf','lm','lt','mb','mm', 'mn','mp',
128 'nd','ng','nk','nt','rn','rp','rt'];
130 $noend = ['bl', 'br', 'cl','cr','dr','fl','fr','gl','gr',
131 'kh', 'kl','kr','mn','pl','pr','rh','tr','qu','wh','q'];
133 $start = mt_rand(0,2);
142 for ($x = 0; $x < $len; $x ++) {
143 $r = mt_rand(0,count($table) - 1);
146 if ($table == $vowels) {
147 $table = array_merge($cons,$midcons);
154 $word = substr($word,0,$len);
156 foreach ($noend as $noe) {
157 $noelen = strlen($noe);
158 if ((strlen($word) > $noelen) && (substr($word, -$noelen) == $noe)) {
159 $word = autoname($len);
168 * Turn user/group ACLs stored as angle bracketed text into arrays
173 function expand_acl($s) {
174 // turn string array of angle-bracketed elements into numeric array
175 // e.g. "<1><2><3>" => array(1,2,3);
179 $t = str_replace('<', '', $s);
180 $a = explode('>', $t);
181 foreach ($a as $aa) {
183 $ret[] = intval($aa);
192 * Wrap ACL elements in angle brackets for storage
193 * @param string $item
195 function sanitise_acl(&$item) {
197 $item = '<' . intval(notags(trim($item))) . '>';
205 * Convert an ACL array to a storable string
207 * Normally ACL permissions will be an array.
208 * We'll also allow a comma-separated string.
210 * @param string|array $p
213 function perms2str($p) {
218 $tmp = explode(',', $p);
221 if (is_array($tmp)) {
222 array_walk($tmp, 'sanitise_acl');
223 $ret = implode('', $tmp);
229 * for html,xml parsing - let's say you've got
230 * an attribute foobar="class1 class2 class3"
231 * and you want to find out if it contains 'class3'.
232 * you can't use a normal sub string search because you
233 * might match 'notclass3' and a regex to do the job is
234 * possible but a bit complicated.
235 * pass the attribute string as $attr and the attribute you
236 * are looking for as $s - returns true if found, otherwise false
238 * @param string $attr attribute value
239 * @param string $s string to search
240 * @return boolean True if found, False otherwise
242 function attribute_contains($attr, $s) {
243 $a = explode(' ', $attr);
244 return (count($a) && in_array($s,$a));
248 * Compare activity uri. Knows about activity namespace.
250 * @param string $haystack
251 * @param string $needle
254 function activity_match($haystack,$needle) {
255 return (($haystack === $needle) || ((basename($needle) === $haystack) && strstr($needle, NAMESPACE_ACTIVITY_SCHEMA)));
260 * @brief Pull out all #hashtags and @person tags from $string.
262 * We also get @person@domain.com - which would make
263 * the regex quite complicated as tags can also
264 * end a sentence. So we'll run through our results
265 * and strip the period from any tags which end with one.
266 * Returns array of tags found, or empty array.
268 * @param string $string Post content
269 * @return array List of tag and person names
271 function get_tags($string) {
274 // Convert hashtag links to hashtags
275 $string = preg_replace('/#\[url\=([^\[\]]*)\](.*?)\[\/url\]/ism', '#$2', $string);
277 // ignore anything in a code block
278 $string = preg_replace('/\[code\](.*?)\[\/code\]/sm', '', $string);
280 // Force line feeds at bbtags
281 $string = str_replace(['[', ']'], ["\n[", "]\n"], $string);
283 // ignore anything in a bbtag
284 $string = preg_replace('/\[(.*?)\]/sm', '', $string);
286 // Match full names against @tags including the space between first and last
287 // We will look these up afterward to see if they are full names or not recognisable.
289 if (preg_match_all('/(@[^ \x0D\x0A,:?]+ [^ \x0D\x0A@,:?]+)([ \x0D\x0A@,:?]|$)/', $string, $matches)) {
290 foreach ($matches[1] as $match) {
291 if (strstr($match, ']')) {
292 // we might be inside a bbcode color tag - leave it alone
295 if (substr($match, -1, 1) === '.') {
296 $ret[] = substr($match, 0, -1);
303 // Otherwise pull out single word tags. These can be @nickname, @first_last
306 if (preg_match_all('/([!#@][^\^ \x0D\x0A,;:?]+)([ \x0D\x0A,;:?]|$)/', $string, $matches)) {
307 foreach ($matches[1] as $match) {
308 if (strstr($match, ']')) {
309 // we might be inside a bbcode color tag - leave it alone
312 if (substr($match, -1, 1) === '.') {
313 $match = substr($match,0,-1);
315 // ignore strictly numeric tags like #1
316 if ((strpos($match, '#') === 0) && ctype_digit(substr($match, 1))) {
319 // try not to catch url fragments
320 if (strpos($string, $match) && preg_match('/[a-zA-z0-9\/]/', substr($string, strpos($string, $match) - 1, 1))) {
331 * quick and dirty quoted_printable encoding
337 return str_replace("%", "=", rawurlencode($s));
341 * @brief Check for a valid email string
343 * @param string $email_address
346 function valid_email($email_address)
348 return preg_match('/^[_a-zA-Z0-9\-\+]+(\.[_a-zA-Z0-9\-\+]+)*@[a-zA-Z0-9-]+(\.[a-zA-Z0-9-]+)+$/', $email_address);
357 function normalise_link($url) {
358 $ret = str_replace(['https:', '//www.'], ['http:', '//'], $url);
359 return rtrim($ret,'/');
364 * Compare two URLs to see if they are the same, but ignore
365 * slight but hopefully insignificant differences such as if one
366 * is https and the other isn't, or if one is www.something and
367 * the other isn't - and also ignore case differences.
369 * @param string $a first url
370 * @param string $b second url
371 * @return boolean True if the URLs match, otherwise False
374 function link_compare($a, $b) {
375 return (strcasecmp(normalise_link($a), normalise_link($b)) === 0);
380 * @brief Find any non-embedded images in private items and add redir links to them
383 * @param array &$item The field array of an item row
385 function redir_private_images($a, &$item)
388 $cnt = preg_match_all('|\[img\](http[^\[]*?/photo/[a-fA-F0-9]+?(-[0-9]\.[\w]+?)?)\[\/img\]|', $item['body'], $matches, PREG_SET_ORDER);
390 foreach ($matches as $mtch) {
391 if (strpos($mtch[1], '/redir') !== false) {
395 if ((local_user() == $item['uid']) && ($item['private'] == 1) && ($item['contact-id'] != $a->contact['id']) && ($item['network'] == Protocol::DFRN)) {
396 $img_url = 'redir?f=1&quiet=1&url=' . urlencode($mtch[1]) . '&conurl=' . urlencode($item['author-link']);
397 $item['body'] = str_replace($mtch[0], '[img]' . $img_url . '[/img]', $item['body']);
404 * @brief Given a text string, convert from bbcode to html and add smilie icons.
406 * @param string $text String with bbcode.
407 * @return string Formattet HTML.
409 function prepare_text($text) {
410 if (stristr($text, '[nosmile]')) {
411 $s = BBCode::convert($text);
413 $s = Smilies::replace(BBCode::convert($text));
420 * return array with details for categories and folders for an item
426 * [ // categories array
428 * 'name': 'category name',
429 * 'removeurl': 'url to remove this category',
430 * 'first': 'is the first in this array? true/false',
431 * 'last': 'is the last in this array? true/false',
437 * 'name': 'folder name',
438 * 'removeurl': 'url to remove this folder',
439 * 'first': 'is the first in this array? true/false',
440 * 'last': 'is the last in this array? true/false',
446 function get_cats_and_terms($item)
453 $cnt = preg_match_all('/<(.*?)>/', $item['file'], $matches, PREG_SET_ORDER);
455 foreach ($matches as $mtch) {
457 'name' => XML::escape(FileTag::decode($mtch[1])),
459 'removeurl' => ((local_user() == $item['uid'])?'filerm/' . $item['id'] . '?f=&cat=' . XML::escape(FileTag::decode($mtch[1])):""),
467 if (count($categories)) {
468 $categories[count($categories) - 1]['last'] = true;
471 if (local_user() == $item['uid']) {
474 $cnt = preg_match_all('/\[(.*?)\]/', $item['file'], $matches, PREG_SET_ORDER);
476 foreach ($matches as $mtch) {
478 'name' => XML::escape(FileTag::decode($mtch[1])),
480 'removeurl' => ((local_user() == $item['uid']) ? 'filerm/' . $item['id'] . '?f=&term=' . XML::escape(FileTag::decode($mtch[1])) : ""),
489 if (count($folders)) {
490 $folders[count($folders) - 1]['last'] = true;
493 return [$categories, $folders];
498 * get private link for item
500 * @return boolean|array False if item has not plink, otherwise array('href'=>plink url, 'title'=>translated title)
502 function get_plink($item) {
505 if ($a->user['nickname'] != "") {
507 //'href' => "display/" . $a->user['nickname'] . "/" . $item['id'],
508 'href' => "display/" . $item['guid'],
509 'orig' => "display/" . $item['guid'],
510 'title' => L10n::t('View on separate page'),
511 'orig_title' => L10n::t('view on separate page'),
514 if (x($item, 'plink')) {
515 $ret["href"] = $a->removeBaseURL($item['plink']);
516 $ret["title"] = L10n::t('link to source');
519 } elseif (x($item, 'plink') && ($item['private'] != 1)) {
521 'href' => $item['plink'],
522 'orig' => $item['plink'],
523 'title' => L10n::t('link to source'),
533 * return number of bytes in size (K, M, G)
534 * @param string $size_str
537 function return_bytes($size_str) {
538 switch (substr ($size_str, -1)) {
539 case 'M': case 'm': return (int)$size_str * 1048576;
540 case 'K': case 'k': return (int)$size_str * 1024;
541 case 'G': case 'g': return (int)$size_str * 1073741824;
542 default: return $size_str;
548 * @param boolean $strip_padding
551 function base64url_encode($s, $strip_padding = false) {
553 $s = strtr(base64_encode($s), '+/', '-_');
555 if ($strip_padding) {
556 $s = str_replace('=','',$s);
566 function base64url_decode($s) {
569 Logger::log('base64url_decode: illegal input: ' . print_r(debug_backtrace(), true));
574 * // Placeholder for new rev of salmon which strips base64 padding.
575 * // PHP base64_decode handles the un-padded input without requiring this step
576 * // Uncomment if you find you need it.
579 * if (!strpos($s,'=')) {
589 return base64_decode(strtr($s,'-_','+/'));
593 function bb_translate_video($s) {
596 $r = preg_match_all("/\[video\](.*?)\[\/video\]/ism",$s,$matches,PREG_SET_ORDER);
598 foreach ($matches as $mtch) {
599 if ((stristr($mtch[1], 'youtube')) || (stristr($mtch[1], 'youtu.be'))) {
600 $s = str_replace($mtch[0], '[youtube]' . $mtch[1] . '[/youtube]', $s);
601 } elseif (stristr($mtch[1], 'vimeo')) {
602 $s = str_replace($mtch[0], '[vimeo]' . $mtch[1] . '[/vimeo]', $s);
609 function normalise_openid($s) {
610 return trim(str_replace(['http://', 'https://'], ['', ''], $s), '/');
614 function undo_post_tagging($s) {
616 $cnt = preg_match_all('/([!#@])\[url=(.*?)\](.*?)\[\/url\]/ism', $s, $matches, PREG_SET_ORDER);
618 foreach ($matches as $mtch) {
619 if (in_array($mtch[1], ['!', '@'])) {
620 $contact = Contact::getDetailsByURL($mtch[2]);
621 $mtch[3] = empty($contact['addr']) ? $mtch[2] : $contact['addr'];
623 $s = str_replace($mtch[0], $mtch[1] . $mtch[3],$s);
629 function protect_sprintf($s) {
630 return str_replace('%', '%%', $s);
633 /// @TODO Rewrite this
634 function is_a_date_arg($s) {
640 if ($i <= $y + 1 && strpos($s, '-') == 4) {
641 $m = intval(substr($s, 5));
643 if ($m > 0 && $m <= 12) {
653 * remove intentation from a text
655 function deindent($text, $chr = "[\t ]", $count = NULL) {
656 $lines = explode("\n", $text);
658 if (is_null($count)) {
661 while ($k < count($lines) && strlen($lines[$k]) == 0) {
664 preg_match("|^" . $chr . "*|", $lines[$k], $m);
665 $count = strlen($m[0]);
668 for ($k = 0; $k < count($lines); $k++) {
669 $lines[$k] = preg_replace("|^" . $chr . "{" . $count . "}|", "", $lines[$k]);
672 return implode("\n", $lines);
675 function formatBytes($bytes, $precision = 2) {
676 $units = ['B', 'KB', 'MB', 'GB', 'TB'];
678 $bytes = max($bytes, 0);
679 $pow = floor(($bytes ? log($bytes) : 0) / log(1024));
680 $pow = min($pow, count($units) - 1);
682 $bytes /= pow(1024, $pow);
684 return round($bytes, $precision) . ' ' . $units[$pow];
688 * @brief translate and format the networkname of a contact
690 * @param string $network
691 * Networkname of the contact (e.g. dfrn, rss and so on)
696 function format_network_name($network, $url = 0) {
697 if ($network != "") {
699 $network_name = '<a href="'.$url.'">'.ContactSelector::networkToName($network, $url)."</a>";
701 $network_name = ContactSelector::networkToName($network);
704 return $network_name;