X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=src%2FModel%2FTag.php;h=0ea19f55d9e11f5e8bc0f11585fd5f287e6480bf;hb=55db2670d1662b32c6faa820c60c44e16cffab20;hp=40f2f8d6a1600ffe0622ca81ec6d09fa0259c482;hpb=cc5e5be931bc21e44d0cb26778e8cc15be924e41;p=friendica.git diff --git a/src/Model/Tag.php b/src/Model/Tag.php index 40f2f8d6a1..0ea19f55d9 100644 --- a/src/Model/Tag.php +++ b/src/Model/Tag.php @@ -1,6 +1,6 @@ '#', self::MENTION => '@', - self::IMPLICIT_MENTION => '%', self::EXCLUSIVE_MENTION => '!', + self::IMPLICIT_MENTION => '%', ]; /** @@ -63,18 +77,18 @@ class Tag * @param integer $type * @param string $name * @param string $url - * @param boolean $probing + * @param integer $target */ - public static function store(int $uriid, int $type, string $name, string $url = '', $probing = true) + public static function store(int $uriid, int $type, string $name, string $url = '', int $target = null) { if ($type == self::HASHTAG) { - // Remove some common "garbarge" from tags - $name = trim($name, "\x00..\x20\xFF#!@,;.:'/?!^°$%".'"'); + // Trim Unicode non-word characters + $name = preg_replace('/(^\W+)|(\W+$)/us', '', $name); $tags = explode(self::TAG_CHARACTER[self::HASHTAG], $name); if (count($tags) > 1) { foreach ($tags as $tag) { - self::store($uriid, $type, $tag, $url, $probing); + self::store($uriid, $type, $tag, $url); } return; } @@ -87,7 +101,7 @@ class Tag $cid = 0; $tagid = 0; - if (in_array($type, [self::MENTION, self::EXCLUSIVE_MENTION, self::IMPLICIT_MENTION])) { + if (in_array($type, [self::MENTION, self::EXCLUSIVE_MENTION, self::IMPLICIT_MENTION, self::TO, self::CC, self::BTO, self::BCC])) { if (empty($url)) { // No mention without a contact url return; @@ -97,44 +111,32 @@ class Tag Logger::notice('Wrong scheme in url', ['url' => $url, 'callstack' => System::callstack(20)]); } - if (!$probing) { - $condition = ['nurl' => Strings::normaliseLink($url), 'uid' => 0, 'deleted' => false]; - $contact = DBA::selectFirst('contact', ['id'], $condition, ['order' => ['id']]); - if (DBA::isResult($contact)) { - $cid = $contact['id']; - Logger::info('Got id for contact url', ['cid' => $cid, 'url' => $url]); - } + $cid = Contact::getIdForURL($url, 0, false); + Logger::debug('Got id for contact', ['cid' => $cid, 'url' => $url]); - if (empty($cid)) { - $ssl_url = str_replace('http://', 'https://', $url); - $condition = ['`alias` IN (?, ?, ?) AND `uid` = ? AND NOT `deleted`', $url, Strings::normaliseLink($url), $ssl_url, 0]; - $contact = DBA::selectFirst('contact', ['id'], $condition, ['order' => ['id']]); - if (DBA::isResult($contact)) { - $cid = $contact['id']; - Logger::info('Got id for contact alias', ['cid' => $cid, 'url' => $url]); + if (empty($cid)) { + $tag = DBA::selectFirst('tag', ['name', 'type'], ['url' => $url]); + if (!empty($tag)) { + if ($tag['name'] != substr($name, 0, 96)) { + DBA::update('tag', ['name' => substr($name, 0, 96)], ['url' => $url]); + } + if (!empty($target) && ($tag['type'] != $target)) { + DBA::update('tag', ['type' => $target], ['url' => $url]); } } - } else { - $cid = Contact::getIdForURL($url, 0, false); - Logger::info('Got id by probing', ['cid' => $cid, 'url' => $url]); - } - - if (empty($cid)) { - // The contact wasn't found in the system (most likely some dead account) - // We ensure that we only store a single entry by overwriting the previous name - Logger::info('Contact not found, updating tag', ['url' => $url, 'name' => $name]); - DBA::update('tag', ['name' => substr($name, 0, 96)], ['url' => $url]); } } if (empty($cid)) { - if (($type != self::HASHTAG) && !empty($url) && ($url != $name)) { - $url = strtolower($url); - } else { - $url = ''; + if (!in_array($type, [self::TO, self::CC, self::BTO, self::BCC])) { + if (($type != self::HASHTAG) && !empty($url) && ($url != $name)) { + $url = strtolower($url); + } else { + $url = ''; + } } - $tagid = self::getID($name, $url); + $tagid = self::getID($name, $url, $target); if (empty($tagid)) { return; } @@ -151,28 +153,87 @@ class Tag } } - DBA::insert('post-tag', $fields, true); + DBA::insert('post-tag', $fields, Database::INSERT_IGNORE); Logger::info('Stored tag/mention', ['uri-id' => $uriid, 'tag-id' => $tagid, 'contact-id' => $cid, 'name' => $name, 'type' => $type, 'callstack' => System::callstack(8)]); } + /** + * Fetch the target type for the given url + * + * @param string $url + * @param bool $fetch Fetch information via network operations + * @return null|int + */ + public static function getTargetType(string $url, bool $fetch = true) + { + $target = null; + + if (empty($url)) { + return $target; + } + + $tag = DBA::selectFirst('tag', ['url', 'type'], ['url' => $url]); + if (!empty($tag['type'])) { + $target = $tag['type']; + if ($target != self::GENERAL_COLLECTION) { + Logger::debug('Found existing type', ['type' => $tag['type'], 'url' => $url]); + return $target; + } + } + + if ($url == ActivityPub::PUBLIC_COLLECTION) { + $target = self::PUBLIC_COLLECTION; + Logger::debug('Public collection', ['url' => $url]); + } else { + if (DBA::exists('apcontact', ['followers' => $url])) { + $target = self::FOLLOWER_COLLECTION; + Logger::debug('Found collection via existing apcontact', ['url' => $url]); + } elseif (Contact::getIdForURL($url, 0, $fetch ? null : false)) { + $target = self::ACCOUNT; + Logger::debug('URL is an account', ['url' => $url]); + } elseif ($fetch && ($target != self::GENERAL_COLLECTION)) { + $content = ActivityPub::fetchContent($url); + if (!empty($content['type']) && ($content['type'] == 'OrderedCollection')) { + $target = self::GENERAL_COLLECTION; + Logger::debug('URL is an ordered collection', ['url' => $url]); + } + } + } + + if (!empty($target) && !empty($tag['url']) && ($tag['type'] != $target)) { + DBA::update('tag', ['type' => $target], ['url' => $url]); + } + + if (empty($target)) { + Logger::debug('No type could be detected', ['url' => $url]); + } + + return $target; + } + /** * Get a tag id for a given tag name and url * * @param string $name * @param string $url + * @param int $type * @return void */ - public static function getID(string $name, string $url = '') + public static function getID(string $name, string $url = '', int $type = null) { $fields = ['name' => substr($name, 0, 96), 'url' => $url]; + if (!empty($type)) { + $fields['type'] = $type; + } + $tag = DBA::selectFirst('tag', ['id'], $fields); if (DBA::isResult($tag)) { return $tag['id']; } - DBA::insert('tag', $fields, true); + DBA::insert('tag', $fields, Database::INSERT_IGNORE); $tid = DBA::lastInsertId(); if (!empty($tid)) { return $tid; @@ -202,22 +263,40 @@ class Tag } /** - * Store tags and mentions from the body - * - * @param integer $uriid URI-Id + * Get tags and mentions from the body + * * @param string $body Body of the post * @param string $tags Accepted tags - * @param boolean $probing Perform a probing for contacts, adding them if needed + * + * @return array Tag list */ - public static function storeFromBody(int $uriid, string $body, string $tags = null, $probing = true) + public static function getFromBody(string $body, string $tags = null) { if (is_null($tags)) { $tags = self::TAG_CHARACTER[self::HASHTAG] . self::TAG_CHARACTER[self::MENTION] . self::TAG_CHARACTER[self::EXCLUSIVE_MENTION]; } + if (!preg_match_all("/([" . $tags . "])\[url\=([^\[\]]*)\]([^\[\]]*)\[\/url\]/ism", $body, $result, PREG_SET_ORDER)) { + return []; + } + + return $result; + } + + /** + * Store tags and mentions from the body + * + * @param integer $uriid URI-Id + * @param string $body Body of the post + * @param string $tags Accepted tags + * @param boolean $probing Perform a probing for contacts, adding them if needed + */ + public static function storeFromBody(int $uriid, string $body, string $tags = null, $probing = true) + { Logger::info('Check for tags', ['uri-id' => $uriid, 'hash' => $tags, 'callstack' => System::callstack()]); - if (!preg_match_all("/([" . $tags . "])\[url\=([^\[\]]*)\]([^\[\]]*)\[\/url\]/ism", $body, $result, PREG_SET_ORDER)) { + $result = self::getFromBody($body, $tags); + if (empty($result)) { return; } @@ -232,7 +311,7 @@ class Tag * Store raw tags (not encapsulated in links) from the body * This function is needed in the intermediate phase. * Later we can call item::setHashtags in advance to have all tags converted. - * + * * @param integer $uriid URI-Id * @param string $body Body of the post */ @@ -263,7 +342,7 @@ class Tag */ public static function existsForPost(int $uriid) { - return DBA::exists('post-tag', ['uri-id' => $uriid, 'type' => [self::HASHTAG, self::MENTION, self::IMPLICIT_MENTION, self::EXCLUSIVE_MENTION]]); + return DBA::exists('post-tag', ['uri-id' => $uriid, 'type' => [self::HASHTAG, self::MENTION, self::EXCLUSIVE_MENTION, self::IMPLICIT_MENTION]]); } /** @@ -338,14 +417,14 @@ class Tag public static function createImplicitMentions(int $uri_id, int $parent_uri_id) { // Always mention the direct parent author - $parent = Item::selectFirst(['author-link', 'author-name'], ['uri-id' => $parent_uri_id]); + $parent = Post::selectFirst(['author-link', 'author-name'], ['uri-id' => $parent_uri_id]); self::store($uri_id, self::IMPLICIT_MENTION, $parent['author-name'], $parent['author-link']); if (DI::config()->get('system', 'disable_implicit_mentions')) { return; } - $tags = DBA::select('tag-view', ['name', 'url'], ['uri-id' => $parent_uri_id]); + $tags = DBA::select('tag-view', ['name', 'url'], ['uri-id' => $parent_uri_id, 'type' => [self::MENTION, self::EXCLUSIVE_MENTION, self::IMPLICIT_MENTION]]); while ($tag = DBA::fetch($tags)) { self::store($uri_id, self::IMPLICIT_MENTION, $tag['name'], $tag['url']); } @@ -360,10 +439,10 @@ class Tag * @return array * @throws \Exception */ - public static function getByURIId(int $uri_id, array $type = [self::HASHTAG, self::MENTION, self::IMPLICIT_MENTION, self::EXCLUSIVE_MENTION]) + public static function getByURIId(int $uri_id, array $type = [self::HASHTAG, self::MENTION, self::EXCLUSIVE_MENTION, self::IMPLICIT_MENTION]) { $condition = ['uri-id' => $uri_id, 'type' => $type]; - return DBA::selectToArray('tag-view', ['type', 'name', 'url'], $condition); + return DBA::selectToArray('tag-view', ['type', 'name', 'url', 'tag-type'], $condition); } /** @@ -374,7 +453,7 @@ class Tag * @return string tags and mentions * @throws \Exception */ - public static function getCSVByURIId(int $uri_id, array $type = [self::HASHTAG, self::MENTION, self::IMPLICIT_MENTION, self::EXCLUSIVE_MENTION]) + public static function getCSVByURIId(int $uri_id, array $type = [self::HASHTAG, self::MENTION, self::EXCLUSIVE_MENTION, self::IMPLICIT_MENTION]) { $tag_list = []; $tags = self::getByURIId($uri_id, $type); @@ -405,7 +484,7 @@ class Tag $searchpath = DI::baseUrl() . "/search?tag="; - $taglist = DBA::select('tag-view', ['type', 'name', 'url'], + $taglist = DBA::select('tag-view', ['type', 'name', 'url', 'cid'], ['uri-id' => $item['uri-id'], 'type' => [self::HASHTAG, self::MENTION, self::EXCLUSIVE_MENTION, self::IMPLICIT_MENTION]]); while ($tag = DBA::fetch($taglist)) { if ($tag['url'] == '') { @@ -421,14 +500,18 @@ class Tag $item['body'] = str_replace($orig_tag, $tag['url'], $item['body']); } - $return['hashtags'][] = $prefix . '' . htmlspecialchars($tag['name']) . ''; - $return['tags'][] = $prefix . '' . htmlspecialchars($tag['name']) . ''; + $return['hashtags'][] = '' . $prefix . '' . htmlspecialchars($tag['name']) . ''; + $return['tags'][] = '' . $prefix . '' . htmlspecialchars($tag['name']) . ''; break; case self::MENTION: case self::EXCLUSIVE_MENTION: + if (!empty($tag['cid'])) { + $tag['url'] = Contact::magicLinkById($tag['cid']); + } else { $tag['url'] = Contact::magicLink($tag['url']); - $return['mentions'][] = $prefix . '' . htmlspecialchars($tag['name']) . ''; - $return['tags'][] = $prefix . '' . htmlspecialchars($tag['name']) . ''; + } + $return['mentions'][] = '' . $prefix . '' . htmlspecialchars($tag['name']) . ''; + $return['tags'][] = '' . $prefix . '' . htmlspecialchars($tag['name']) . ''; break; case self::IMPLICIT_MENTION: $return['implicit_mentions'][] = $prefix . $tag['name']; @@ -449,10 +532,11 @@ class Tag */ public static function countByTag(string $search, int $uid = 0) { - $condition = ["`name` = ? AND (NOT `private` OR (`private` AND `uid` = ?))", $search, $uid]; - $params = ['group_by' => ['uri-id']]; + $condition = ["`name` = ? AND (`uid` = ? OR (`uid` = ? AND NOT `global`)) + AND (`network` IN (?, ?, ?, ?) OR (`uid` = ? AND `uid` != ?))", + $search, 0, $uid, Protocol::ACTIVITYPUB, Protocol::DFRN, Protocol::DIASPORA, Protocol::OSTATUS, $uid, 0]; - return DBA::count('tag-search-view', $condition, $params); + return DBA::count('tag-search-view', $condition); } /** @@ -462,14 +546,21 @@ class Tag * @param integer $uid * @param integer $start * @param integer $limit + * @param integer $last_uriid * @return array with URI-ID */ - public static function getURIIdListByTag(string $search, int $uid = 0, int $start = 0, int $limit = 100) + public static function getURIIdListByTag(string $search, int $uid = 0, int $start = 0, int $limit = 100, int $last_uriid = 0) { - $condition = ["`name` = ? AND (NOT `private` OR (`private` AND `uid` = ?))", $search, $uid]; + $condition = ["`name` = ? AND (`uid` = ? OR (`uid` = ? AND NOT `global`)) + AND (`network` IN (?, ?, ?, ?) OR (`uid` = ? AND `uid` != ?))", + $search, 0, $uid, Protocol::ACTIVITYPUB, Protocol::DFRN, Protocol::DIASPORA, Protocol::OSTATUS, $uid, 0]; + + if (!empty($last_uriid)) { + $condition = DBA::mergeConditions($condition, ["`uri-id` < ?", $last_uriid]); + } + $params = [ 'order' => ['uri-id' => true], - 'group_by' => ['uri-id'], 'limit' => [$start, $limit] ]; @@ -488,54 +579,123 @@ class Tag * Returns a list of the most frequent global hashtags over the given period * * @param int $period Period in hours to consider posts + * @param int $limit Number of returned tags * @return array * @throws \Exception */ public static function getGlobalTrendingHashtags(int $period, $limit = 10) { - $tags = DI::cache()->get('global_trending_tags'); - - if (empty($tags)) { - $tagsStmt = DBA::p("SELECT `name` AS `term`, COUNT(*) AS `score` - FROM `tag-search-view` - WHERE `private` = ? AND `received` > DATE_SUB(NOW(), INTERVAL ? HOUR) - GROUP BY `term` ORDER BY `score` DESC LIMIT ?", - Item::PUBLIC, $period, $limit); - - if (DBA::isResult($tagsStmt)) { - $tags = DBA::toArray($tagsStmt); - DI::cache()->set('global_trending_tags', $tags, Duration::HOUR); - } + $tags = DI::cache()->get('global_trending_tags-' . $period . '-' . $limit); + if (!empty($tags)) { + return $tags; + } else { + return self::setGlobalTrendingHashtags($period, $limit); + } + } + + /** + * Fetch the blocked tags as SQL + * + * @return string + */ + private static function getBlockedSQL() + { + $blocked_txt = DI::config()->get('system', 'blocked_tags'); + if (empty($blocked_txt)) { + return ''; } - return $tags ?: []; + $blocked = explode(',', $blocked_txt); + array_walk($blocked, function(&$value) { $value = "'" . DBA::escape(trim($value)) . "'";}); + return " AND NOT `name` IN (" . implode(',', $blocked) . ")"; + } + + /** + * Creates a list of the most frequent global hashtags over the given period + * + * @param int $period Period in hours to consider posts + * @param int $limit Number of returned tags + * @return array + * @throws \Exception + */ + public static function setGlobalTrendingHashtags(int $period, int $limit = 10) + { + // Get a uri-id that is at least X hours old. + // We use the uri-id in the query for the hash tags since this is much faster + $post = Post::selectFirstThread(['uri-id'], ["`uid` = ? AND `received` < ?", 0, DateTimeFormat::utc('now - ' . $period . ' hour')], + ['order' => ['received' => true]]); + if (empty($post['uri-id'])) { + return []; + } + + $block_sql = self::getBlockedSQL(); + + $tagsStmt = DBA::p("SELECT `name` AS `term`, COUNT(*) AS `score`, COUNT(DISTINCT(`author-id`)) as `authors` + FROM `tag-search-view` + WHERE `private` = ? AND `uid` = ? AND `uri-id` > ? $block_sql + GROUP BY `term` ORDER BY `authors` DESC, `score` DESC LIMIT ?", + Item::PUBLIC, 0, $post['uri-id'], $limit); + + if (DBA::isResult($tagsStmt)) { + $tags = DBA::toArray($tagsStmt); + DI::cache()->set('global_trending_tags-' . $period . '-' . $limit, $tags, Duration::DAY); + return $tags; + } + + return []; } /** * Returns a list of the most frequent local hashtags over the given period * * @param int $period Period in hours to consider posts + * @param int $limit Number of returned tags * @return array * @throws \Exception */ public static function getLocalTrendingHashtags(int $period, $limit = 10) { - $tags = DI::cache()->get('local_trending_tags'); - - if (empty($tags)) { - $tagsStmt = DBA::p("SELECT `name` AS `term`, COUNT(*) AS `score` - FROM `tag-search-view` - WHERE `private` = ? AND `wall` AND `origin` AND `received` > DATE_SUB(NOW(), INTERVAL ? HOUR) - GROUP BY `term` ORDER BY `score` DESC LIMIT ?", - Item::PUBLIC, $period, $limit); - - if (DBA::isResult($tagsStmt)) { - $tags = DBA::toArray($tagsStmt); - DI::cache()->set('local_trending_tags', $tags, Duration::HOUR); - } + $tags = DI::cache()->get('local_trending_tags-' . $period . '-' . $limit); + if (!empty($tags)) { + return $tags; + } else { + return self::setLocalTrendingHashtags($period, $limit); + } + } + + /** + * Returns a list of the most frequent local hashtags over the given period + * + * @param int $period Period in hours to consider posts + * @param int $limit Number of returned tags + * @return array + * @throws \Exception + */ + public static function setLocalTrendingHashtags(int $period, int $limit = 10) + { + // Get a uri-id that is at least X hours old. + // We use the uri-id in the query for the hash tags since this is much faster + $post = Post::selectFirstThread(['uri-id'], ["`uid` = ? AND `received` < ?", 0, DateTimeFormat::utc('now - ' . $period . ' hour')], + ['order' => ['received' => true]]); + if (empty($post['uri-id'])) { + return []; + } + + $block_sql = self::getBlockedSQL(); + + $tagsStmt = DBA::p("SELECT `name` AS `term`, COUNT(*) AS `score`, COUNT(DISTINCT(`author-id`)) as `authors` + FROM `tag-search-view` + WHERE `private` = ? AND `wall` AND `origin` AND `uri-id` > ? $block_sql + GROUP BY `term` ORDER BY `authors` DESC, `score` DESC LIMIT ?", + Item::PUBLIC, $post['uri-id'], $limit); + + if (DBA::isResult($tagsStmt)) { + $tags = DBA::toArray($tagsStmt); + DI::cache()->set('local_trending_tags-' . $period . '-' . $limit, $tags, Duration::DAY); + return $tags; } - return $tags ?: []; + return []; } /**