3 * @copyright Copyright (C) 2010-2022, the Friendica project
5 * @license GNU AGPL version 3 or any later version
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU Affero General Public License as
9 * published by the Free Software Foundation, either version 3 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Affero General Public License for more details.
17 * You should have received a copy of the GNU Affero General Public License
18 * along with this program. If not, see <https://www.gnu.org/licenses/>.
22 namespace Friendica\Model;
24 use Friendica\Content\Text\BBCode;
25 use Friendica\Core\Cache\Enum\Duration;
26 use Friendica\Core\Logger;
27 use Friendica\Core\Protocol;
28 use Friendica\Core\System;
29 use Friendica\Database\Database;
30 use Friendica\Database\DBA;
32 use Friendica\Protocol\ActivityPub;
33 use Friendica\Util\DateTimeFormat;
34 use Friendica\Util\Strings;
39 * This Model class handles tag table interactions.
40 * This tables stores relevant tags related to posts, like hashtags and mentions.
48 * An implicit mention is a mention in a comment body that is redundant with the threading information.
50 const IMPLICIT_MENTION = 8;
52 * An exclusive mention transmits the post only to the target account without transmitting it to the followers, usually a forum.
54 const EXCLUSIVE_MENTION = 9;
62 const GENERAL_COLLECTION = 2;
63 const FOLLOWER_COLLECTION = 3;
64 const PUBLIC_COLLECTION = 4;
66 const TAG_CHARACTER = [
69 self::EXCLUSIVE_MENTION => '!',
70 self::IMPLICIT_MENTION => '%',
74 * Store tag/mention elements
76 * @param integer $uriid URI id
77 * @param integer $type Tag type
78 * @param string $name Tag name
79 * @param string $url Contact URL (optional)
80 * @param integer $target Target (default: null)
83 public static function store(int $uriid, int $type, string $name, string $url = '', int $target = null)
85 if ($type == self::HASHTAG) {
86 // Trim Unicode non-word characters
87 $name = preg_replace('/(^\W+)|(\W+$)/us', '', $name);
89 $tags = explode(self::TAG_CHARACTER[self::HASHTAG], $name);
90 if (count($tags) > 1) {
91 foreach ($tags as $tag) {
92 self::store($uriid, $type, $tag, $url);
105 if (in_array($type, [self::MENTION, self::EXCLUSIVE_MENTION, self::IMPLICIT_MENTION, self::TO, self::CC, self::BTO, self::BCC])) {
107 // No mention without a contact url
111 if ((substr($url, 0, 7) == 'https//') || (substr($url, 0, 6) == 'http//')) {
112 Logger::notice('Wrong scheme in url', ['url' => $url, 'callstack' => System::callstack(20)]);
115 $cid = Contact::getIdForURL($url, 0, false);
116 Logger::debug('Got id for contact', ['cid' => $cid, 'url' => $url]);
119 $tag = DBA::selectFirst('tag', ['name', 'type'], ['url' => $url]);
121 if ($tag['name'] != substr($name, 0, 96)) {
122 DBA::update('tag', ['name' => substr($name, 0, 96)], ['url' => $url]);
124 if (!empty($target) && ($tag['type'] != $target)) {
125 DBA::update('tag', ['type' => $target], ['url' => $url]);
132 if (!in_array($type, [self::TO, self::CC, self::BTO, self::BCC])) {
133 if (($type != self::HASHTAG) && !empty($url) && ($url != $name)) {
134 $url = strtolower($url);
140 $tagid = self::getID($name, $url, $target);
146 $fields = ['uri-id' => $uriid, 'type' => $type, 'tid' => $tagid, 'cid' => $cid];
148 if (in_array($type, [self::MENTION, self::EXCLUSIVE_MENTION, self::IMPLICIT_MENTION])) {
149 $condition = $fields;
150 $condition['type'] = [self::MENTION, self::EXCLUSIVE_MENTION, self::IMPLICIT_MENTION];
151 if (DBA::exists('post-tag', $condition)) {
152 Logger::info('Tag already exists', $fields);
157 DBA::insert('post-tag', $fields, Database::INSERT_IGNORE);
159 Logger::info('Stored tag/mention', ['uri-id' => $uriid, 'tag-id' => $tagid, 'contact-id' => $cid, 'name' => $name, 'type' => $type, 'callstack' => System::callstack(8)]);
163 * Fetch the target type for the given url
166 * @param bool $fetch Fetch information via network operations
169 public static function getTargetType(string $url, bool $fetch = true)
177 $tag = DBA::selectFirst('tag', ['url', 'type'], ['url' => $url]);
178 if (!empty($tag['type'])) {
179 $target = $tag['type'];
180 if ($target != self::GENERAL_COLLECTION) {
181 Logger::debug('Found existing type', ['type' => $tag['type'], 'url' => $url]);
186 if ($url == ActivityPub::PUBLIC_COLLECTION) {
187 $target = self::PUBLIC_COLLECTION;
188 Logger::debug('Public collection', ['url' => $url]);
190 if (DBA::exists('apcontact', ['followers' => $url])) {
191 $target = self::FOLLOWER_COLLECTION;
192 Logger::debug('Found collection via existing apcontact', ['url' => $url]);
193 } elseif (Contact::getIdForURL($url, 0, $fetch ? null : false)) {
194 $target = self::ACCOUNT;
195 Logger::debug('URL is an account', ['url' => $url]);
196 } elseif ($fetch && ($target != self::GENERAL_COLLECTION)) {
197 $content = ActivityPub::fetchContent($url);
198 if (!empty($content['type']) && ($content['type'] == 'OrderedCollection')) {
199 $target = self::GENERAL_COLLECTION;
200 Logger::debug('URL is an ordered collection', ['url' => $url]);
205 if (!empty($target) && !empty($tag['url']) && ($tag['type'] != $target)) {
206 DBA::update('tag', ['type' => $target], ['url' => $url]);
209 if (empty($target)) {
210 Logger::debug('No type could be detected', ['url' => $url]);
217 * Get a tag id for a given tag name and url
219 * @param string $name
224 public static function getID(string $name, string $url = '', int $type = null)
226 $fields = ['name' => substr($name, 0, 96), 'url' => $url];
228 $tag = DBA::selectFirst('tag', ['id', 'type'], $fields);
229 if (DBA::isResult($tag)) {
230 if (empty($tag['type']) && !empty($type)) {
231 DBA::update('tag', ['type' => $type], $fields);
237 $fields['type'] = $type;
240 DBA::insert('tag', $fields, Database::INSERT_IGNORE);
241 $tid = DBA::lastInsertId();
246 Logger::error('No tag id created', $fields);
251 * Store tag/mention elements
253 * @param integer $uriid URI id
254 * @param string $hash Hash
255 * @param string $name Name
256 * @param string $url URL
257 * @param boolean $probing Whether probing is active
260 public static function storeByHash(int $uriid, string $hash, string $name, string $url = '', bool $probing = true)
262 $type = self::getTypeForHash($hash);
263 if ($type == self::UNKNOWN) {
267 self::store($uriid, $type, $name, $url, $probing);
271 * Get tags and mentions from the body
273 * @param string $body Body of the post
274 * @param string $tags Accepted tags
276 * @return array Tag list
278 public static function getFromBody(string $body, string $tags = null)
280 if (is_null($tags)) {
281 $tags = self::TAG_CHARACTER[self::HASHTAG] . self::TAG_CHARACTER[self::MENTION] . self::TAG_CHARACTER[self::EXCLUSIVE_MENTION];
284 if (!preg_match_all("/([" . $tags . "])\[url\=([^\[\]]*)\]([^\[\]]*)\[\/url\]/ism", $body, $result, PREG_SET_ORDER)) {
292 * Store tags and mentions from the body
294 * @param integer $uriid URI-Id
295 * @param string $body Body of the post
296 * @param string $tags Accepted tags
297 * @param boolean $probing Perform a probing for contacts, adding them if needed
300 public static function storeFromBody(int $uriid, string $body, string $tags = null, bool $probing = true)
302 Logger::info('Check for tags', ['uri-id' => $uriid, 'hash' => $tags, 'callstack' => System::callstack()]);
304 if (is_null($tags)) {
305 $tags = self::TAG_CHARACTER[self::HASHTAG] . self::TAG_CHARACTER[self::MENTION] . self::TAG_CHARACTER[self::EXCLUSIVE_MENTION];
308 // Only remove the shared data from "real" reshares
309 $shared = BBCode::fetchShareAttributes($body);
310 if (!empty($shared['guid'])) {
311 if (preg_match("/\s*\[share .*?\](.*?)\[\/share\]\s*/ism", $body, $matches)) {
312 $share_body = $matches[1];
314 $body = preg_replace("/\s*\[share .*?\].*?\[\/share\]\s*/ism", '', $body);
317 foreach (self::getFromBody($body, $tags) as $tag) {
318 self::storeByHash($uriid, $tag[1], $tag[3], $tag[2], $probing);
321 // Search for hashtags in the shared body (but only if hashtags are wanted)
322 if (!empty($share_body) && (strpos($tags, self::TAG_CHARACTER[self::HASHTAG]) !== false)) {
323 foreach (self::getFromBody($share_body, self::TAG_CHARACTER[self::HASHTAG]) as $tag) {
324 self::storeByHash($uriid, $tag[1], $tag[3], $tag[2], $probing);
330 * Store raw tags (not encapsulated in links) from the body
331 * This function is needed in the intermediate phase.
332 * Later we can call item::setHashtags in advance to have all tags converted.
334 * @param integer $uriid URI-Id
335 * @param string $body Body of the post
338 public static function storeRawTagsFromBody(int $uriid, string $body)
340 Logger::info('Check for tags', ['uri-id' => $uriid, 'callstack' => System::callstack()]);
342 $result = BBCode::getTags($body);
343 if (empty($result)) {
347 Logger::info('Found tags', ['uri-id' => $uriid, 'result' => $result]);
349 foreach ($result as $tag) {
350 if (substr($tag, 0, 1) != self::TAG_CHARACTER[self::HASHTAG]) {
353 self::storeByHash($uriid, substr($tag, 0, 1), substr($tag, 1));
358 * Checks for stored hashtags and mentions for the given post
360 * @param integer $uriid
363 public static function existsForPost(int $uriid)
365 return DBA::exists('post-tag', ['uri-id' => $uriid, 'type' => [self::HASHTAG, self::MENTION, self::EXCLUSIVE_MENTION, self::IMPLICIT_MENTION]]);
371 * @param integer $uriid URI id
372 * @param integer $type Type
373 * @param string $name Name
374 * @param string $url URL
377 public static function remove(int $uriid, int $type, string $name, string $url = '')
379 $condition = ['uri-id' => $uriid, 'type' => $type, 'url' => $url];
380 if ($type == self::HASHTAG) {
381 $condition['name'] = $name;
384 $tag = DBA::selectFirst('tag-view', ['tid', 'cid'], $condition);
385 if (!DBA::isResult($tag)) {
389 Logger::info('Removing tag/mention', ['uri-id' => $uriid, 'tid' => $tag['tid'], 'name' => $name, 'url' => $url, 'callstack' => System::callstack(8)]);
390 DBA::delete('post-tag', ['uri-id' => $uriid, 'type' => $type, 'tid' => $tag['tid'], 'cid' => $tag['cid']]);
396 * @param integer $uriid
397 * @param string $hash
398 * @param string $name
401 public static function removeByHash(int $uriid, string $hash, string $name, string $url = '')
403 $type = self::getTypeForHash($hash);
404 if ($type == self::UNKNOWN) {
408 self::remove($uriid, $type, $name, $url);
412 * Get the type for the given hash
414 * @param string $hash
415 * @return integer type
417 private static function getTypeForHash(string $hash)
419 if ($hash == self::TAG_CHARACTER[self::MENTION]) {
420 return self::MENTION;
421 } elseif ($hash == self::TAG_CHARACTER[self::EXCLUSIVE_MENTION]) {
422 return self::EXCLUSIVE_MENTION;
423 } elseif ($hash == self::TAG_CHARACTER[self::IMPLICIT_MENTION]) {
424 return self::IMPLICIT_MENTION;
425 } elseif ($hash == self::TAG_CHARACTER[self::HASHTAG]) {
426 return self::HASHTAG;
428 return self::UNKNOWN;
433 * Create implicit mentions for a given post
435 * @param integer $uri_id
436 * @param integer $parent_uri_id
438 public static function createImplicitMentions(int $uri_id, int $parent_uri_id)
440 // Always mention the direct parent author
441 $parent = Post::selectFirst(['author-link', 'author-name'], ['uri-id' => $parent_uri_id]);
442 self::store($uri_id, self::IMPLICIT_MENTION, $parent['author-name'], $parent['author-link']);
444 if (DI::config()->get('system', 'disable_implicit_mentions')) {
448 $tags = DBA::select('tag-view', ['name', 'url'], ['uri-id' => $parent_uri_id, 'type' => [self::MENTION, self::EXCLUSIVE_MENTION, self::IMPLICIT_MENTION]]);
449 while ($tag = DBA::fetch($tags)) {
450 self::store($uri_id, self::IMPLICIT_MENTION, $tag['name'], $tag['url']);
456 * Retrieves the terms from the provided type(s) associated with the provided item ID.
458 * @param int $item_id
459 * @param int|array $type
463 public static function getByURIId(int $uri_id, array $type = [self::HASHTAG, self::MENTION, self::EXCLUSIVE_MENTION, self::IMPLICIT_MENTION])
465 $condition = ['uri-id' => $uri_id, 'type' => $type];
466 return DBA::selectToArray('tag-view', ['type', 'name', 'url', 'tag-type'], $condition);
470 * Return a string with all tags and mentions
472 * @param integer $uri_id
474 * @return string tags and mentions
477 public static function getCSVByURIId(int $uri_id, array $type = [self::HASHTAG, self::MENTION, self::EXCLUSIVE_MENTION, self::IMPLICIT_MENTION])
480 $tags = self::getByURIId($uri_id, $type);
481 foreach ($tags as $tag) {
482 $tag_list[] = self::TAG_CHARACTER[$tag['type']] . '[url=' . $tag['url'] . ']' . $tag['name'] . '[/url]';
485 return implode(',', $tag_list);
489 * Sorts an item's tags into mentions, hashtags and other tags. Generate personalized URLs by user and modify the
490 * provided item's body with them.
494 * @throws \Friendica\Network\HTTPException\InternalServerErrorException
495 * @throws \ImagickException
497 public static function populateFromItem(&$item)
503 'implicit_mentions' => [],
506 $searchpath = DI::baseUrl() . "/search?tag=";
508 $taglist = DBA::select('tag-view', ['type', 'name', 'url', 'cid'],
509 ['uri-id' => $item['uri-id'], 'type' => [self::HASHTAG, self::MENTION, self::EXCLUSIVE_MENTION, self::IMPLICIT_MENTION]]);
510 while ($tag = DBA::fetch($taglist)) {
511 if ($tag['url'] == '') {
512 $tag['url'] = $searchpath . rawurlencode($tag['name']);
515 $orig_tag = $tag['url'];
517 $prefix = self::TAG_CHARACTER[$tag['type']];
518 switch($tag['type']) {
520 if ($orig_tag != $tag['url']) {
521 $item['body'] = str_replace($orig_tag, $tag['url'], $item['body']);
524 $return['hashtags'][] = '<bdi>' . $prefix . '<a href="' . $tag['url'] . '" target="_blank" rel="noopener noreferrer">' . htmlspecialchars($tag['name']) . '</a></bdi>';
525 $return['tags'][] = '<bdi>' . $prefix . '<a href="' . $tag['url'] . '" target="_blank" rel="noopener noreferrer">' . htmlspecialchars($tag['name']) . '</a></bdi>';
528 case self::EXCLUSIVE_MENTION:
529 if (!empty($tag['cid'])) {
530 $tag['url'] = Contact::magicLinkById($tag['cid']);
532 $tag['url'] = Contact::magicLink($tag['url']);
534 $return['mentions'][] = '<bdi>' . $prefix . '<a href="' . $tag['url'] . '" target="_blank" rel="noopener noreferrer">' . htmlspecialchars($tag['name']) . '</a></bdi>';
535 $return['tags'][] = '<bdi>' . $prefix . '<a href="' . $tag['url'] . '" target="_blank" rel="noopener noreferrer">' . htmlspecialchars($tag['name']) . '</a></bdi>';
537 case self::IMPLICIT_MENTION:
538 $return['implicit_mentions'][] = $prefix . $tag['name'];
542 DBA::close($taglist);
548 * Counts posts for given tag
550 * @param string $search
551 * @param integer $uid
552 * @return integer number of posts
554 public static function countByTag(string $search, int $uid = 0)
556 $condition = ["`name` = ? AND (`uid` = ? OR (`uid` = ? AND NOT `global`))
557 AND (`network` IN (?, ?, ?, ?) OR (`uid` = ? AND `uid` != ?))",
558 $search, 0, $uid, Protocol::ACTIVITYPUB, Protocol::DFRN, Protocol::DIASPORA, Protocol::OSTATUS, $uid, 0];
560 return DBA::count('tag-search-view', $condition);
564 * Search posts for given tag
566 * @param string $search
567 * @param integer $uid
568 * @param integer $start
569 * @param integer $limit
570 * @param integer $last_uriid
571 * @return array with URI-ID
573 public static function getURIIdListByTag(string $search, int $uid = 0, int $start = 0, int $limit = 100, int $last_uriid = 0)
575 $condition = ["`name` = ? AND (`uid` = ? OR (`uid` = ? AND NOT `global`))
576 AND (`network` IN (?, ?, ?, ?) OR (`uid` = ? AND `uid` != ?))",
577 $search, 0, $uid, Protocol::ACTIVITYPUB, Protocol::DFRN, Protocol::DIASPORA, Protocol::OSTATUS, $uid, 0];
579 if (!empty($last_uriid)) {
580 $condition = DBA::mergeConditions($condition, ["`uri-id` < ?", $last_uriid]);
584 'order' => ['uri-id' => true],
585 'limit' => [$start, $limit]
588 $tags = DBA::select('tag-search-view', ['uri-id'], $condition, $params);
591 while ($tag = DBA::fetch($tags)) {
592 $uriids[] = $tag['uri-id'];
600 * Returns a list of the most frequent global hashtags over the given period
602 * @param int $period Period in hours to consider posts
603 * @param int $limit Number of returned tags
607 public static function getGlobalTrendingHashtags(int $period, $limit = 10)
609 $tags = DI::cache()->get('global_trending_tags-' . $period . '-' . $limit);
613 return self::setGlobalTrendingHashtags($period, $limit);
618 * Fetch the blocked tags as SQL
622 private static function getBlockedSQL()
624 $blocked_txt = DI::config()->get('system', 'blocked_tags');
625 if (empty($blocked_txt)) {
629 $blocked = explode(',', $blocked_txt);
630 array_walk($blocked, function(&$value) { $value = "'" . DBA::escape(trim($value)) . "'";});
631 return " AND NOT `name` IN (" . implode(',', $blocked) . ")";
635 * Creates a list of the most frequent global hashtags over the given period
637 * @param int $period Period in hours to consider posts
638 * @param int $limit Number of returned tags
642 public static function setGlobalTrendingHashtags(int $period, int $limit = 10)
644 // Get a uri-id that is at least X hours old.
645 // We use the uri-id in the query for the hash tags since this is much faster
646 $post = Post::selectFirstThread(['uri-id'], ["`uid` = ? AND `received` < ?", 0, DateTimeFormat::utc('now - ' . $period . ' hour')],
647 ['order' => ['received' => true]]);
648 if (empty($post['uri-id'])) {
652 $block_sql = self::getBlockedSQL();
654 $tagsStmt = DBA::p("SELECT `name` AS `term`, COUNT(*) AS `score`, COUNT(DISTINCT(`author-id`)) as `authors`
655 FROM `tag-search-view`
656 WHERE `private` = ? AND `uid` = ? AND `uri-id` > ? $block_sql
657 GROUP BY `term` ORDER BY `authors` DESC, `score` DESC LIMIT ?",
658 Item::PUBLIC, 0, $post['uri-id'], $limit);
660 if (DBA::isResult($tagsStmt)) {
661 $tags = DBA::toArray($tagsStmt);
662 DI::cache()->set('global_trending_tags-' . $period . '-' . $limit, $tags, Duration::DAY);
670 * Returns a list of the most frequent local hashtags over the given period
672 * @param int $period Period in hours to consider posts
673 * @param int $limit Number of returned tags
677 public static function getLocalTrendingHashtags(int $period, $limit = 10)
679 $tags = DI::cache()->get('local_trending_tags-' . $period . '-' . $limit);
683 return self::setLocalTrendingHashtags($period, $limit);
688 * Returns a list of the most frequent local hashtags over the given period
690 * @param int $period Period in hours to consider posts
691 * @param int $limit Number of returned tags
695 public static function setLocalTrendingHashtags(int $period, int $limit = 10)
697 // Get a uri-id that is at least X hours old.
698 // We use the uri-id in the query for the hash tags since this is much faster
699 $post = Post::selectFirstThread(['uri-id'], ["`uid` = ? AND `received` < ?", 0, DateTimeFormat::utc('now - ' . $period . ' hour')],
700 ['order' => ['received' => true]]);
701 if (empty($post['uri-id'])) {
705 $block_sql = self::getBlockedSQL();
707 $tagsStmt = DBA::p("SELECT `name` AS `term`, COUNT(*) AS `score`, COUNT(DISTINCT(`author-id`)) as `authors`
708 FROM `tag-search-view`
709 WHERE `private` = ? AND `wall` AND `origin` AND `uri-id` > ? $block_sql
710 GROUP BY `term` ORDER BY `authors` DESC, `score` DESC LIMIT ?",
711 Item::PUBLIC, $post['uri-id'], $limit);
713 if (DBA::isResult($tagsStmt)) {
714 $tags = DBA::toArray($tagsStmt);
715 DI::cache()->set('local_trending_tags-' . $period . '-' . $limit, $tags, Duration::DAY);
723 * Check if the provided tag is of one of the provided term types.
726 * @param int ...$types
729 public static function isType($tag, ...$types)
732 foreach ($types as $type) {
733 if (array_key_exists($type, self::TAG_CHARACTER)) {
734 $tag_chars[] = self::TAG_CHARACTER[$type];
738 return Strings::startsWithChars($tag, $tag_chars);
742 * Fetch user who subscribed to the given tag
745 * @return array User list
747 private static function getUIDListByTag(string $tag)
750 $searches = DBA::select('search', ['uid'], ['term' => $tag]);
751 while ($search = DBA::fetch($searches)) {
752 $uids[] = $search['uid'];
754 DBA::close($searches);
760 * Fetch user who subscribed to the tags of the given item
762 * @param integer $uri_id
763 * @return array User list
765 public static function getUIDListByURIId(int $uri_id)
768 $tags = self::getByURIId($uri_id, [self::HASHTAG]);
770 foreach ($tags as $tag) {
771 $uids = array_merge($uids, self::getUIDListByTag(self::TAG_CHARACTER[self::HASHTAG] . $tag['name']));
774 return array_unique($uids);