]> git.mxchange.org Git - friendica.git/blob - src/Model/Term.php
Merge pull request #7487 from MrPetovan/task/7473-trending-tags
[friendica.git] / src / Model / Term.php
1 <?php
2 /**
3  * @file src/Model/Term.php
4  */
5 namespace Friendica\Model;
6
7 use Friendica\Core\Cache;
8 use Friendica\Core\Logger;
9 use Friendica\Core\System;
10 use Friendica\Database\DBA;
11 use Friendica\Util\Strings;
12
13 /**
14  * Class Term
15  *
16  * This Model class handles term table interactions.
17  * This tables stores relevant terms related to posts, photos and searches, like hashtags, mentions and
18  * user-applied categories.
19  *
20  * @package Friendica\Model
21  */
22 class Term
23 {
24     const UNKNOWN           = 0;
25     const HASHTAG           = 1;
26     const MENTION           = 2;
27     const CATEGORY          = 3;
28     const PCATEGORY         = 4;
29     const FILE              = 5;
30     const SAVEDSEARCH       = 6;
31     const CONVERSATION      = 7;
32         /**
33          * An implicit mention is a mention in a comment body that is redundant with the threading information.
34          */
35     const IMPLICIT_MENTION  = 8;
36         /**
37          * An exclusive mention transfers the ownership of the post to the target account, usually a forum.
38          */
39     const EXCLUSIVE_MENTION = 9;
40
41     const TAG_CHARACTER = [
42         self::HASHTAG           => '#',
43         self::MENTION           => '@',
44         self::IMPLICIT_MENTION  => '%',
45         self::EXCLUSIVE_MENTION => '!',
46     ];
47
48     const OBJECT_TYPE_POST  = 1;
49     const OBJECT_TYPE_PHOTO = 2;
50
51         /**
52          * Returns a list of the most frequent global hashtags over the given period
53          *
54          * @param int $period Period in hours to consider posts
55          * @return array
56          * @throws \Exception
57          */
58         public static function getGlobalTrendingHashtags(int $period, $limit = 10)
59         {
60                 $tags = Cache::get('global_trending_tags');
61
62                 if (!$tags) {
63                         $tagsStmt = DBA::p("SELECT t.`term`, COUNT(*) AS `score`
64                                 FROM `term` t
65                                  JOIN `item` i ON i.`id` = t.`oid` AND i.`uid` = t.`uid`
66                                  JOIN `thread` ON `thread`.`iid` = i.`id`
67                                 WHERE `thread`.`visible`
68                                   AND NOT `thread`.`deleted`
69                                   AND NOT `thread`.`moderated`
70                                   AND NOT `thread`.`private`
71                                   AND t.`uid` = 0
72                                   AND t.`otype` = ?
73                                   AND t.`type` = ?
74                                   AND t.`term` != ''
75                                   AND i.`received` > DATE_SUB(NOW(), INTERVAL ? HOUR)
76                                 GROUP BY `term`
77                                 ORDER BY `score` DESC
78                                 LIMIT ?",
79                                 Term::OBJECT_TYPE_POST,
80                                 Term::HASHTAG,
81                                 $period,
82                                 $limit
83                         );
84
85                         if (DBA::isResult($tagsStmt)) {
86                                 $tags = DBA::toArray($tagsStmt);
87                                 Cache::set('global_trending_tags', $tags, Cache::HOUR);
88                         }
89                 }
90
91                 return $tags ?: [];
92         }
93
94         /**
95          * Returns a list of the most frequent local hashtags over the given period
96          *
97          * @param int $period Period in hours to consider posts
98          * @return array
99          * @throws \Exception
100          */
101         public static function getLocalTrendingHashtags(int $period, $limit = 10)
102         {
103                 $tags = Cache::get('local_trending_tags');
104
105                 if (!$tags) {
106                         $tagsStmt = DBA::p("SELECT t.`term`, COUNT(*) AS `score`
107                                 FROM `term` t
108                                 JOIN `item` i ON i.`id` = t.`oid` AND i.`uid` = t.`uid`
109                                 JOIN `thread` ON `thread`.`iid` = i.`id`
110                                 JOIN `user` ON `user`.`uid` = `thread`.`uid` AND NOT `user`.`hidewall`
111                                 WHERE `thread`.`visible`
112                                   AND NOT `thread`.`deleted`
113                                   AND NOT `thread`.`moderated`
114                                   AND NOT `thread`.`private`
115                                   AND `thread`.`wall`
116                                   AND `thread`.`origin`
117                                   AND t.`otype` = ?
118                                   AND t.`type` = ?
119                                   AND t.`term` != ''
120                                   AND i.`received` > DATE_SUB(NOW(), INTERVAL ? HOUR)
121                                 GROUP BY `term`
122                                 ORDER BY `score` DESC
123                                 LIMIT ?",
124                                 Term::OBJECT_TYPE_POST,
125                                 Term::HASHTAG,
126                                 $period,
127                                 $limit
128                         );
129
130                         if (DBA::isResult($tagsStmt)) {
131                                 $tags = DBA::toArray($tagsStmt);
132                                 Cache::set('local_trending_tags', $tags, Cache::HOUR);
133                         }
134                 }
135
136                 return $tags ?: [];
137         }
138
139         /**
140          * Generates the legacy item.tag field comma-separated BBCode string from an item ID.
141          * Includes only hashtags, implicit and explicit mentions.
142          *
143          * @param int $item_id
144          * @return string
145          * @throws \Exception
146          */
147         public static function tagTextFromItemId($item_id)
148         {
149                 $tag_list = [];
150                 $tags = self::tagArrayFromItemId($item_id, [self::HASHTAG, self::MENTION, self::IMPLICIT_MENTION]);
151                 foreach ($tags as $tag) {
152                         $tag_list[] = self::TAG_CHARACTER[$tag['type']] . '[url=' . $tag['url'] . ']' . $tag['term'] . '[/url]';
153                 }
154
155                 return implode(',', $tag_list);
156         }
157
158         /**
159          * Retrieves the terms from the provided type(s) associated with the provided item ID.
160          *
161          * @param int       $item_id
162          * @param int|array $type
163          * @return array
164          * @throws \Exception
165          */
166         public static function tagArrayFromItemId($item_id, $type = [self::HASHTAG, self::MENTION])
167         {
168                 $condition = ['otype' => self::OBJECT_TYPE_POST, 'oid' => $item_id, 'type' => $type];
169                 $tags = DBA::select('term', ['type', 'term', 'url'], $condition);
170                 if (!DBA::isResult($tags)) {
171                         return [];
172                 }
173
174                 return DBA::toArray($tags);
175         }
176
177         /**
178          * Generates the legacy item.file field string from an item ID.
179          * Includes only file and category terms.
180          *
181          * @param int $item_id
182          * @return string
183          * @throws \Exception
184          */
185         public static function fileTextFromItemId($item_id)
186         {
187                 $file_text = '';
188                 $tags = self::tagArrayFromItemId($item_id, [self::FILE, self::CATEGORY]);
189                 foreach ($tags as $tag) {
190                         if ($tag['type'] == self::CATEGORY) {
191                                 $file_text .= '<' . $tag['term'] . '>';
192                         } else {
193                                 $file_text .= '[' . $tag['term'] . ']';
194                         }
195                 }
196
197                 return $file_text;
198         }
199
200         /**
201          * Inserts new terms for the provided item ID based on the legacy item.tag field BBCode content.
202          * Deletes all previous tag terms for the same item ID.
203          * Sets both the item.mention and thread.mentions field flags if a mention concerning the item UID is found.
204          *
205          * @param int    $item_id
206          * @param string $tag_str
207          * @throws \Friendica\Network\HTTPException\InternalServerErrorException
208          */
209         public static function insertFromTagFieldByItemId($item_id, $tag_str)
210         {
211                 $profile_base = System::baseUrl();
212                 $profile_data = parse_url($profile_base);
213                 $profile_path = defaults($profile_data, 'path', '');
214                 $profile_base_friendica = $profile_data['host'] . $profile_path . '/profile/';
215                 $profile_base_diaspora = $profile_data['host'] . $profile_path . '/u/';
216
217                 $fields = ['guid', 'uid', 'id', 'edited', 'deleted', 'created', 'received', 'title', 'body', 'parent'];
218                 $item = Item::selectFirst($fields, ['id' => $item_id]);
219                 if (!DBA::isResult($item)) {
220                         return;
221                 }
222
223                 $item['tag'] = $tag_str;
224
225                 // Clean up all tags
226                 self::deleteByItemId($item_id);
227
228                 if ($item['deleted']) {
229                         return;
230                 }
231
232                 $taglist = explode(',', $item['tag']);
233
234                 $tags_string = '';
235                 foreach ($taglist as $tag) {
236                         if (Strings::startsWith($tag, self::TAG_CHARACTER)) {
237                                 $tags_string .= ' ' . trim($tag);
238                         } else {
239                                 $tags_string .= ' #' . trim($tag);
240                         }
241                 }
242
243                 $data = ' ' . $item['title'] . ' ' . $item['body'] . ' ' . $tags_string . ' ';
244
245                 // ignore anything in a code block
246                 $data = preg_replace('/\[code\](.*?)\[\/code\]/sm', '', $data);
247
248                 $tags = [];
249
250                 $pattern = '/\W\#([^\[].*?)[\s\'".,:;\?!\[\]\/]/ism';
251                 if (preg_match_all($pattern, $data, $matches)) {
252                         foreach ($matches[1] as $match) {
253                                 $tags['#' . $match] = '';
254                         }
255                 }
256
257                 $pattern = '/\W([\#@!%])\[url\=(.*?)\](.*?)\[\/url\]/ism';
258                 if (preg_match_all($pattern, $data, $matches, PREG_SET_ORDER)) {
259                         foreach ($matches as $match) {
260
261                                 if (in_array($match[1], [
262                                         self::TAG_CHARACTER[self::MENTION],
263                                         self::TAG_CHARACTER[self::IMPLICIT_MENTION],
264                                         self::TAG_CHARACTER[self::EXCLUSIVE_MENTION]
265                                 ])) {
266                                         $contact = Contact::getDetailsByURL($match[2], 0);
267                                         if (!empty($contact['addr'])) {
268                                                 $match[3] = $contact['addr'];
269                                         }
270
271                                         if (!empty($contact['url'])) {
272                                                 $match[2] = $contact['url'];
273                                         }
274                                 }
275
276                                 $tags[$match[2]] = $match[1] . trim($match[3], ',.:;[]/\"?!');
277                         }
278                 }
279
280                 foreach ($tags as $link => $tag) {
281                         if (self::isType($tag, self::HASHTAG)) {
282                                 // try to ignore #039 or #1 or anything like that
283                                 if (ctype_digit(substr(trim($tag), 1))) {
284                                         continue;
285                                 }
286
287                                 // try to ignore html hex escapes, e.g. #x2317
288                                 if ((substr(trim($tag), 1, 1) == 'x' || substr(trim($tag), 1, 1) == 'X') && ctype_digit(substr(trim($tag), 2))) {
289                                         continue;
290                                 }
291
292                                 $type = self::HASHTAG;
293                                 $term = substr($tag, 1);
294                                 $link = '';
295                         } elseif (self::isType($tag, self::MENTION, self::EXCLUSIVE_MENTION, self::IMPLICIT_MENTION)) {
296                                 if (self::isType($tag, self::MENTION, self::EXCLUSIVE_MENTION)) {
297                                         $type = self::MENTION;
298                                 } else {
299                                         $type = self::IMPLICIT_MENTION;
300                                 }
301
302                                 $contact = Contact::getDetailsByURL($link, 0);
303                                 if (!empty($contact['name'])) {
304                                         $term = $contact['name'];
305                                 } else {
306                                         $term = substr($tag, 1);
307                                 }
308                         } else { // This shouldn't happen
309                                 $type = self::HASHTAG;
310                                 $term = $tag;
311                                 $link = '';
312
313                                 Logger::notice('Unknown term type', ['tag' => $tag]);
314                         }
315
316                         if (DBA::exists('term', ['uid' => $item['uid'], 'otype' => self::OBJECT_TYPE_POST, 'oid' => $item_id, 'term' => $term, 'type' => $type])) {
317                                 continue;
318                         }
319
320                         if ($item['uid'] == 0) {
321                                 $global = true;
322                                 DBA::update('term', ['global' => true], ['otype' => self::OBJECT_TYPE_POST, 'guid' => $item['guid']]);
323                         } else {
324                                 $global = DBA::exists('term', ['uid' => 0, 'otype' => self::OBJECT_TYPE_POST, 'guid' => $item['guid']]);
325                         }
326
327                         DBA::insert('term', [
328                                 'uid'      => $item['uid'],
329                                 'oid'      => $item_id,
330                                 'otype'    => self::OBJECT_TYPE_POST,
331                                 'type'     => $type,
332                                 'term'     => $term,
333                                 'url'      => $link,
334                                 'guid'     => $item['guid'],
335                                 'created'  => $item['created'],
336                                 'received' => $item['received'],
337                                 'global'   => $global
338                         ]);
339
340                         // Search for mentions
341                         if (self::isType($tag, self::MENTION, self::EXCLUSIVE_MENTION)
342                                 && (
343                                         strpos($link, $profile_base_friendica) !== false
344                                         || strpos($link, $profile_base_diaspora) !== false
345                                 )
346                         ) {
347                                 $users_stmt = DBA::p("SELECT `uid` FROM `contact` WHERE self AND (`url` = ? OR `nurl` = ?)", $link, $link);
348                                 $users = DBA::toArray($users_stmt);
349                                 foreach ($users AS $user) {
350                                         if ($user['uid'] == $item['uid']) {
351                                                 /// @todo This function is called from Item::update - so we mustn't call that function here
352                                                 DBA::update('item', ['mention' => true], ['id' => $item_id]);
353                                                 DBA::update('thread', ['mention' => true], ['iid' => $item['parent']]);
354                                         }
355                                 }
356                         }
357                 }
358         }
359
360         /**
361          * Inserts new terms for the provided item ID based on the legacy item.file field BBCode content.
362          * Deletes all previous file terms for the same item ID.
363          *
364          * @param integer $item_id item id
365          * @param         $files
366          * @return void
367          * @throws \Exception
368          */
369         public static function insertFromFileFieldByItemId($item_id, $files)
370         {
371                 $message = Item::selectFirst(['uid', 'deleted'], ['id' => $item_id]);
372                 if (!DBA::isResult($message)) {
373                         return;
374                 }
375
376                 // Clean up all tags
377                 DBA::delete('term', ['otype' => self::OBJECT_TYPE_POST, 'oid' => $item_id, 'type' => [self::FILE, self::CATEGORY]]);
378
379                 if ($message["deleted"]) {
380                         return;
381                 }
382
383                 $message['file'] = $files;
384
385                 if (preg_match_all("/\[(.*?)\]/ism", $message["file"], $files)) {
386                         foreach ($files[1] as $file) {
387                                 DBA::insert('term', [
388                                         'uid' => $message["uid"],
389                                         'oid' => $item_id,
390                                         'otype' => self::OBJECT_TYPE_POST,
391                                         'type' => self::FILE,
392                                         'term' => $file
393                                 ]);
394                         }
395                 }
396
397                 if (preg_match_all("/\<(.*?)\>/ism", $message["file"], $files)) {
398                         foreach ($files[1] as $file) {
399                                 DBA::insert('term', [
400                                         'uid' => $message["uid"],
401                                         'oid' => $item_id,
402                                         'otype' => self::OBJECT_TYPE_POST,
403                                         'type' => self::CATEGORY,
404                                         'term' => $file
405                                 ]);
406                         }
407                 }
408         }
409
410         /**
411          * Sorts an item's tags into mentions, hashtags and other tags. Generate personalized URLs by user and modify the
412          * provided item's body with them.
413          *
414          * @param array $item
415          * @return array
416          * @throws \Friendica\Network\HTTPException\InternalServerErrorException
417          * @throws \ImagickException
418          */
419         public static function populateTagsFromItem(&$item)
420         {
421                 $return = [
422                         'tags' => [],
423                         'hashtags' => [],
424                         'mentions' => [],
425                         'implicit_mentions' => [],
426                 ];
427
428                 $searchpath = System::baseUrl() . "/search?tag=";
429
430                 $taglist = DBA::select(
431                         'term',
432                         ['type', 'term', 'url'],
433                         ['otype' => self::OBJECT_TYPE_POST, 'oid' => $item['id'], 'type' => [self::HASHTAG, self::MENTION, self::IMPLICIT_MENTION]],
434                         ['order' => ['tid']]
435                 );
436                 while ($tag = DBA::fetch($taglist)) {
437                         if ($tag['url'] == '') {
438                                 $tag['url'] = $searchpath . rawurlencode($tag['term']);
439                         }
440
441                         $orig_tag = $tag['url'];
442
443                         $prefix = self::TAG_CHARACTER[$tag['type']];
444                         switch($tag['type']) {
445                                 case self::HASHTAG:
446                                         if ($orig_tag != $tag['url']) {
447                                                 $item['body'] = str_replace($orig_tag, $tag['url'], $item['body']);
448                                         }
449
450                                         $return['hashtags'][] = $prefix . '<a href="' . $tag['url'] . '" target="_blank">' . $tag['term'] . '</a>';
451                                         $return['tags'][] = $prefix . '<a href="' . $tag['url'] . '" target="_blank">' . $tag['term'] . '</a>';
452                                         break;
453                                 case self::MENTION:
454                                         $tag['url'] = Contact::magicLink($tag['url']);
455                                         $return['mentions'][] = $prefix . '<a href="' . $tag['url'] . '" target="_blank">' . $tag['term'] . '</a>';
456                                         $return['tags'][] = $prefix . '<a href="' . $tag['url'] . '" target="_blank">' . $tag['term'] . '</a>';
457                                         break;
458                                 case self::IMPLICIT_MENTION:
459                                         $return['implicit_mentions'][] = $prefix . $tag['term'];
460                                         break;
461                         }
462                 }
463                 DBA::close($taglist);
464
465                 return $return;
466         }
467
468         /**
469          * Delete tags of the specific type(s) from an item
470          *
471          * @param int       $item_id
472          * @param int|array $type
473          * @throws \Exception
474          */
475         public static function deleteByItemId($item_id, $type = [self::HASHTAG, self::MENTION, self::IMPLICIT_MENTION])
476         {
477                 if (empty($item_id)) {
478                         return;
479                 }
480
481                 // Clean up all tags
482                 DBA::delete('term', ['otype' => self::OBJECT_TYPE_POST, 'oid' => $item_id, 'type' => $type]);
483         }
484
485         /**
486          * Check if the provided tag is of one of the provided term types.
487          *
488          * @param string $tag
489          * @param int    ...$types
490          * @return bool
491          */
492         public static function isType($tag, ...$types)
493         {
494                 $tag_chars = [];
495                 foreach ($types as $type) {
496                         if (array_key_exists($type, self::TAG_CHARACTER)) {
497                                 $tag_chars[] = self::TAG_CHARACTER[$type];
498                         }
499                 }
500
501                 return Strings::startsWith($tag, $tag_chars);
502         }
503 }