X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=src%2FModel%2FTag.php;h=002501aa45f91084a80015cd0315070d6ed8c87f;hb=624e4c192c7f837ac0587a50da6e1409081eb519;hp=f947b9f898429744b2f427e3dd1fa0fd4619499f;hpb=b81206549922d7509a5865cb08733ff75b4d7a98;p=friendica.git diff --git a/src/Model/Tag.php b/src/Model/Tag.php index f947b9f898..002501aa45 100644 --- a/src/Model/Tag.php +++ b/src/Model/Tag.php @@ -1,6 +1,6 @@ '#', self::MENTION => '@', - self::IMPLICIT_MENTION => '%', self::EXCLUSIVE_MENTION => '!', + self::IMPLICIT_MENTION => '%', ]; /** @@ -63,18 +77,18 @@ class Tag * @param integer $type * @param string $name * @param string $url - * @param boolean $probing + * @param integer $target */ - public static function store(int $uriid, int $type, string $name, string $url = '', $probing = true) + public static function store(int $uriid, int $type, string $name, string $url = '', int $target = null) { if ($type == self::HASHTAG) { - // Remove some common "garbarge" from tags - $name = trim($name, "\x00..\x20\xFF#!@,;.:'/?!^°$%".'"'); + // Trim Unicode non-word characters + $name = preg_replace('/(^\W+)|(\W+$)/us', '', $name); $tags = explode(self::TAG_CHARACTER[self::HASHTAG], $name); if (count($tags) > 1) { foreach ($tags as $tag) { - self::store($uriid, $type, $tag, $url, $probing); + self::store($uriid, $type, $tag, $url); } return; } @@ -87,7 +101,7 @@ class Tag $cid = 0; $tagid = 0; - if (in_array($type, [self::MENTION, self::EXCLUSIVE_MENTION, self::IMPLICIT_MENTION])) { + if (in_array($type, [self::MENTION, self::EXCLUSIVE_MENTION, self::IMPLICIT_MENTION, self::TO, self::CC, self::BTO, self::BCC])) { if (empty($url)) { // No mention without a contact url return; @@ -97,44 +111,32 @@ class Tag Logger::notice('Wrong scheme in url', ['url' => $url, 'callstack' => System::callstack(20)]); } - if (!$probing) { - $condition = ['nurl' => Strings::normaliseLink($url), 'uid' => 0, 'deleted' => false]; - $contact = DBA::selectFirst('contact', ['id'], $condition, ['order' => ['id']]); - if (DBA::isResult($contact)) { - $cid = $contact['id']; - Logger::info('Got id for contact url', ['cid' => $cid, 'url' => $url]); - } + $cid = Contact::getIdForURL($url, 0, false); + Logger::debug('Got id for contact', ['cid' => $cid, 'url' => $url]); - if (empty($cid)) { - $ssl_url = str_replace('http://', 'https://', $url); - $condition = ['`alias` IN (?, ?, ?) AND `uid` = ? AND NOT `deleted`', $url, Strings::normaliseLink($url), $ssl_url, 0]; - $contact = DBA::selectFirst('contact', ['id'], $condition, ['order' => ['id']]); - if (DBA::isResult($contact)) { - $cid = $contact['id']; - Logger::info('Got id for contact alias', ['cid' => $cid, 'url' => $url]); + if (empty($cid)) { + $tag = DBA::selectFirst('tag', ['name', 'type'], ['url' => $url]); + if (!empty($tag)) { + if ($tag['name'] != substr($name, 0, 96)) { + DBA::update('tag', ['name' => substr($name, 0, 96)], ['url' => $url]); + } + if (!empty($target) && ($tag['type'] != $target)) { + DBA::update('tag', ['type' => $target], ['url' => $url]); } } - } else { - $cid = Contact::getIdForURL($url, 0, false); - Logger::info('Got id by probing', ['cid' => $cid, 'url' => $url]); - } - - if (empty($cid)) { - // The contact wasn't found in the system (most likely some dead account) - // We ensure that we only store a single entry by overwriting the previous name - Logger::info('Contact not found, updating tag', ['url' => $url, 'name' => $name]); - DBA::update('tag', ['name' => substr($name, 0, 96)], ['url' => $url]); } } if (empty($cid)) { - if (($type != self::HASHTAG) && !empty($url) && ($url != $name)) { - $url = strtolower($url); - } else { - $url = ''; + if (!in_array($type, [self::TO, self::CC, self::BTO, self::BCC])) { + if (($type != self::HASHTAG) && !empty($url) && ($url != $name)) { + $url = strtolower($url); + } else { + $url = ''; + } } - $tagid = self::getID($name, $url); + $tagid = self::getID($name, $url, $target); if (empty($tagid)) { return; } @@ -151,28 +153,90 @@ class Tag } } - DBA::insert('post-tag', $fields, true); + DBA::insert('post-tag', $fields, Database::INSERT_IGNORE); Logger::info('Stored tag/mention', ['uri-id' => $uriid, 'tag-id' => $tagid, 'contact-id' => $cid, 'name' => $name, 'type' => $type, 'callstack' => System::callstack(8)]); } + /** + * Fetch the target type for the given url + * + * @param string $url + * @param bool $fetch Fetch information via network operations + * @return null|int + */ + public static function getTargetType(string $url, bool $fetch = true) + { + $target = null; + + if (empty($url)) { + return $target; + } + + $tag = DBA::selectFirst('tag', ['url', 'type'], ['url' => $url]); + if (!empty($tag['type'])) { + $target = $tag['type']; + if ($target != self::GENERAL_COLLECTION) { + Logger::debug('Found existing type', ['type' => $tag['type'], 'url' => $url]); + return $target; + } + } + + if ($url == ActivityPub::PUBLIC_COLLECTION) { + $target = self::PUBLIC_COLLECTION; + Logger::debug('Public collection', ['url' => $url]); + } else { + if (DBA::exists('apcontact', ['followers' => $url])) { + $target = self::FOLLOWER_COLLECTION; + Logger::debug('Found collection via existing apcontact', ['url' => $url]); + } elseif (Contact::getIdForURL($url, 0, $fetch ? null : false)) { + $target = self::ACCOUNT; + Logger::debug('URL is an account', ['url' => $url]); + } elseif ($fetch && ($target != self::GENERAL_COLLECTION)) { + $content = ActivityPub::fetchContent($url); + if (!empty($content['type']) && ($content['type'] == 'OrderedCollection')) { + $target = self::GENERAL_COLLECTION; + Logger::debug('URL is an ordered collection', ['url' => $url]); + } + } + } + + if (!empty($target) && !empty($tag['url']) && ($tag['type'] != $target)) { + DBA::update('tag', ['type' => $target], ['url' => $url]); + } + + if (empty($target)) { + Logger::debug('No type could be detected', ['url' => $url]); + } + + return $target; + } + /** * Get a tag id for a given tag name and url * * @param string $name * @param string $url + * @param int $type * @return void */ - public static function getID(string $name, string $url = '') + public static function getID(string $name, string $url = '', int $type = null) { $fields = ['name' => substr($name, 0, 96), 'url' => $url]; - $tag = DBA::selectFirst('tag', ['id'], $fields); + $tag = DBA::selectFirst('tag', ['id', 'type'], $fields); if (DBA::isResult($tag)) { + if (empty($tag['type']) && !empty($type)) { + DBA::update('tag', ['type' => $type], $fields); + } return $tag['id']; } - DBA::insert('tag', $fields, true); + if (!empty($type)) { + $fields['type'] = $type; + } + + DBA::insert('tag', $fields, Database::INSERT_IGNORE); $tid = DBA::lastInsertId(); if (!empty($tid)) { return $tid; @@ -201,9 +265,30 @@ class Tag self::store($uriid, $type, $name, $url, $probing); } + /** + * Get tags and mentions from the body + * + * @param string $body Body of the post + * @param string $tags Accepted tags + * + * @return array Tag list + */ + public static function getFromBody(string $body, string $tags = null) + { + if (is_null($tags)) { + $tags = self::TAG_CHARACTER[self::HASHTAG] . self::TAG_CHARACTER[self::MENTION] . self::TAG_CHARACTER[self::EXCLUSIVE_MENTION]; + } + + if (!preg_match_all("/([" . $tags . "])\[url\=([^\[\]]*)\]([^\[\]]*)\[\/url\]/ism", $body, $result, PREG_SET_ORDER)) { + return []; + } + + return $result; + } + /** * Store tags and mentions from the body - * + * * @param integer $uriid URI-Id * @param string $body Body of the post * @param string $tags Accepted tags @@ -211,28 +296,38 @@ class Tag */ public static function storeFromBody(int $uriid, string $body, string $tags = null, $probing = true) { - if (is_null($tags)) { - $tags = self::TAG_CHARACTER[self::HASHTAG] . self::TAG_CHARACTER[self::MENTION] . self::TAG_CHARACTER[self::EXCLUSIVE_MENTION]; - } - Logger::info('Check for tags', ['uri-id' => $uriid, 'hash' => $tags, 'callstack' => System::callstack()]); - if (!preg_match_all("/([" . $tags . "])\[url\=([^\[\]]*)\]([^\[\]]*)\[\/url\]/ism", $body, $result, PREG_SET_ORDER)) { - return; + if (is_null($tags)) { + $tags = self::TAG_CHARACTER[self::HASHTAG] . self::TAG_CHARACTER[self::MENTION] . self::TAG_CHARACTER[self::EXCLUSIVE_MENTION]; } - Logger::info('Found tags', ['uri-id' => $uriid, 'hash' => $tags, 'result' => $result]); + // Only remove the shared data from "real" reshares + $shared = BBCode::fetchShareAttributes($body); + if (!empty($shared['guid'])) { + if (preg_match("/\s*\[share .*?\](.*?)\[\/share\]\s*/ism", $body, $matches)) { + $share_body = $matches[1]; + } + $body = preg_replace("/\s*\[share .*?\].*?\[\/share\]\s*/ism", '', $body); + } - foreach ($result as $tag) { + foreach (self::getFromBody($body, $tags) as $tag) { self::storeByHash($uriid, $tag[1], $tag[3], $tag[2], $probing); } + + // Search for hashtags in the shared body (but only if hashtags are wanted) + if (!empty($share_body) && (strpos($tags, self::TAG_CHARACTER[self::HASHTAG]) !== false)) { + foreach (self::getFromBody($share_body, self::TAG_CHARACTER[self::HASHTAG]) as $tag) { + self::storeByHash($uriid, $tag[1], $tag[3], $tag[2], $probing); + } + } } /** * Store raw tags (not encapsulated in links) from the body * This function is needed in the intermediate phase. * Later we can call item::setHashtags in advance to have all tags converted. - * + * * @param integer $uriid URI-Id * @param string $body Body of the post */ @@ -263,7 +358,7 @@ class Tag */ public static function existsForPost(int $uriid) { - return DBA::exists('post-tag', ['uri-id' => $uriid, 'type' => [self::HASHTAG, self::MENTION, self::IMPLICIT_MENTION, self::EXCLUSIVE_MENTION]]); + return DBA::exists('post-tag', ['uri-id' => $uriid, 'type' => [self::HASHTAG, self::MENTION, self::EXCLUSIVE_MENTION, self::IMPLICIT_MENTION]]); } /** @@ -338,14 +433,14 @@ class Tag public static function createImplicitMentions(int $uri_id, int $parent_uri_id) { // Always mention the direct parent author - $parent = Item::selectFirst(['author-link', 'author-name'], ['uri-id' => $parent_uri_id]); + $parent = Post::selectFirst(['author-link', 'author-name'], ['uri-id' => $parent_uri_id]); self::store($uri_id, self::IMPLICIT_MENTION, $parent['author-name'], $parent['author-link']); if (DI::config()->get('system', 'disable_implicit_mentions')) { return; } - $tags = DBA::select('tag-view', ['name', 'url'], ['uri-id' => $parent_uri_id]); + $tags = DBA::select('tag-view', ['name', 'url'], ['uri-id' => $parent_uri_id, 'type' => [self::MENTION, self::EXCLUSIVE_MENTION, self::IMPLICIT_MENTION]]); while ($tag = DBA::fetch($tags)) { self::store($uri_id, self::IMPLICIT_MENTION, $tag['name'], $tag['url']); } @@ -360,10 +455,10 @@ class Tag * @return array * @throws \Exception */ - public static function getByURIId(int $uri_id, array $type = [self::HASHTAG, self::MENTION, self::IMPLICIT_MENTION, self::EXCLUSIVE_MENTION]) + public static function getByURIId(int $uri_id, array $type = [self::HASHTAG, self::MENTION, self::EXCLUSIVE_MENTION, self::IMPLICIT_MENTION]) { $condition = ['uri-id' => $uri_id, 'type' => $type]; - return DBA::selectToArray('tag-view', ['type', 'name', 'url'], $condition); + return DBA::selectToArray('tag-view', ['type', 'name', 'url', 'tag-type'], $condition); } /** @@ -374,7 +469,7 @@ class Tag * @return string tags and mentions * @throws \Exception */ - public static function getCSVByURIId(int $uri_id, array $type = [self::HASHTAG, self::MENTION, self::IMPLICIT_MENTION, self::EXCLUSIVE_MENTION]) + public static function getCSVByURIId(int $uri_id, array $type = [self::HASHTAG, self::MENTION, self::EXCLUSIVE_MENTION, self::IMPLICIT_MENTION]) { $tag_list = []; $tags = self::getByURIId($uri_id, $type); @@ -405,7 +500,7 @@ class Tag $searchpath = DI::baseUrl() . "/search?tag="; - $taglist = DBA::select('tag-view', ['type', 'name', 'url'], + $taglist = DBA::select('tag-view', ['type', 'name', 'url', 'cid'], ['uri-id' => $item['uri-id'], 'type' => [self::HASHTAG, self::MENTION, self::EXCLUSIVE_MENTION, self::IMPLICIT_MENTION]]); while ($tag = DBA::fetch($taglist)) { if ($tag['url'] == '') { @@ -421,14 +516,18 @@ class Tag $item['body'] = str_replace($orig_tag, $tag['url'], $item['body']); } - $return['hashtags'][] = $prefix . '' . htmlspecialchars($tag['name']) . ''; - $return['tags'][] = $prefix . '' . htmlspecialchars($tag['name']) . ''; + $return['hashtags'][] = '' . $prefix . '' . htmlspecialchars($tag['name']) . ''; + $return['tags'][] = '' . $prefix . '' . htmlspecialchars($tag['name']) . ''; break; case self::MENTION: case self::EXCLUSIVE_MENTION: + if (!empty($tag['cid'])) { + $tag['url'] = Contact::magicLinkById($tag['cid']); + } else { $tag['url'] = Contact::magicLink($tag['url']); - $return['mentions'][] = $prefix . '' . htmlspecialchars($tag['name']) . ''; - $return['tags'][] = $prefix . '' . htmlspecialchars($tag['name']) . ''; + } + $return['mentions'][] = '' . $prefix . '' . htmlspecialchars($tag['name']) . ''; + $return['tags'][] = '' . $prefix . '' . htmlspecialchars($tag['name']) . ''; break; case self::IMPLICIT_MENTION: $return['implicit_mentions'][] = $prefix . $tag['name']; @@ -449,10 +548,11 @@ class Tag */ public static function countByTag(string $search, int $uid = 0) { - $condition = ["`name` = ? AND (NOT `private` OR (`private` AND `uid` = ?))", $search, $uid]; - $params = ['group_by' => ['uri-id']]; + $condition = ["`name` = ? AND (`uid` = ? OR (`uid` = ? AND NOT `global`)) + AND (`network` IN (?, ?, ?, ?) OR (`uid` = ? AND `uid` != ?))", + $search, 0, $uid, Protocol::ACTIVITYPUB, Protocol::DFRN, Protocol::DIASPORA, Protocol::OSTATUS, $uid, 0]; - return DBA::count('tag-search-view', $condition, $params); + return DBA::count('tag-search-view', $condition); } /** @@ -467,7 +567,9 @@ class Tag */ public static function getURIIdListByTag(string $search, int $uid = 0, int $start = 0, int $limit = 100, int $last_uriid = 0) { - $condition = ["`name` = ? AND (NOT `private` OR (`private` AND `uid` = ?))", $search, $uid]; + $condition = ["`name` = ? AND (`uid` = ? OR (`uid` = ? AND NOT `global`)) + AND (`network` IN (?, ?, ?, ?) OR (`uid` = ? AND `uid` != ?))", + $search, 0, $uid, Protocol::ACTIVITYPUB, Protocol::DFRN, Protocol::DIASPORA, Protocol::OSTATUS, $uid, 0]; if (!empty($last_uriid)) { $condition = DBA::mergeConditions($condition, ["`uri-id` < ?", $last_uriid]); @@ -475,7 +577,6 @@ class Tag $params = [ 'order' => ['uri-id' => true], - 'group_by' => ['uri-id'], 'limit' => [$start, $limit] ]; @@ -508,6 +609,23 @@ class Tag } } + /** + * Fetch the blocked tags as SQL + * + * @return string + */ + private static function getBlockedSQL() + { + $blocked_txt = DI::config()->get('system', 'blocked_tags'); + if (empty($blocked_txt)) { + return ''; + } + + $blocked = explode(',', $blocked_txt); + array_walk($blocked, function(&$value) { $value = "'" . DBA::escape(trim($value)) . "'";}); + return " AND NOT `name` IN (" . implode(',', $blocked) . ")"; + } + /** * Creates a list of the most frequent global hashtags over the given period * @@ -516,13 +634,23 @@ class Tag * @return array * @throws \Exception */ - public static function setGlobalTrendingHashtags(int $period, $limit = 10) + public static function setGlobalTrendingHashtags(int $period, int $limit = 10) { - $tagsStmt = DBA::p("SELECT `name` AS `term`, COUNT(*) AS `score` + // Get a uri-id that is at least X hours old. + // We use the uri-id in the query for the hash tags since this is much faster + $post = Post::selectFirstThread(['uri-id'], ["`uid` = ? AND `received` < ?", 0, DateTimeFormat::utc('now - ' . $period . ' hour')], + ['order' => ['received' => true]]); + if (empty($post['uri-id'])) { + return []; + } + + $block_sql = self::getBlockedSQL(); + + $tagsStmt = DBA::p("SELECT `name` AS `term`, COUNT(*) AS `score`, COUNT(DISTINCT(`author-id`)) as `authors` FROM `tag-search-view` - WHERE `private` = ? AND `uid` = ? AND `received` > DATE_SUB(NOW(), INTERVAL ? HOUR) - GROUP BY `term` ORDER BY `score` DESC LIMIT ?", - Item::PUBLIC, 0, $period, $limit); + WHERE `private` = ? AND `uid` = ? AND `uri-id` > ? $block_sql + GROUP BY `term` ORDER BY `authors` DESC, `score` DESC LIMIT ?", + Item::PUBLIC, 0, $post['uri-id'], $limit); if (DBA::isResult($tagsStmt)) { $tags = DBA::toArray($tagsStmt); @@ -559,13 +687,23 @@ class Tag * @return array * @throws \Exception */ - public static function setLocalTrendingHashtags(int $period, $limit = 10) + public static function setLocalTrendingHashtags(int $period, int $limit = 10) { - $tagsStmt = DBA::p("SELECT `name` AS `term`, COUNT(*) AS `score` + // Get a uri-id that is at least X hours old. + // We use the uri-id in the query for the hash tags since this is much faster + $post = Post::selectFirstThread(['uri-id'], ["`uid` = ? AND `received` < ?", 0, DateTimeFormat::utc('now - ' . $period . ' hour')], + ['order' => ['received' => true]]); + if (empty($post['uri-id'])) { + return []; + } + + $block_sql = self::getBlockedSQL(); + + $tagsStmt = DBA::p("SELECT `name` AS `term`, COUNT(*) AS `score`, COUNT(DISTINCT(`author-id`)) as `authors` FROM `tag-search-view` - WHERE `private` = ? AND `wall` AND `origin` AND `received` > DATE_SUB(NOW(), INTERVAL ? HOUR) - GROUP BY `term` ORDER BY `score` DESC LIMIT ?", - Item::PUBLIC, $period, $limit); + WHERE `private` = ? AND `wall` AND `origin` AND `uri-id` > ? $block_sql + GROUP BY `term` ORDER BY `authors` DESC, `score` DESC LIMIT ?", + Item::PUBLIC, $post['uri-id'], $limit); if (DBA::isResult($tagsStmt)) { $tags = DBA::toArray($tagsStmt);