]> git.mxchange.org Git - friendica.git/blob - src/Model/Term.php
Raw content is now stored with announce messages as well
[friendica.git] / src / Model / Term.php
1 <?php
2 /**
3  * @copyright Copyright (C) 2020, Friendica
4  *
5  * @license GNU AGPL version 3 or any later version
6  *
7  * This program is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU Affero General Public License as
9  * published by the Free Software Foundation, either version 3 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU Affero General Public License for more details.
16  *
17  * You should have received a copy of the GNU Affero General Public License
18  * along with this program.  If not, see <https://www.gnu.org/licenses/>.
19  *
20  */
21
22 namespace Friendica\Model;
23
24 use Friendica\Core\Cache\Duration;
25 use Friendica\Core\Logger;
26 use Friendica\Database\DBA;
27 use Friendica\DI;
28 use Friendica\Util\Strings;
29
30 /**
31  * Class Term
32  *
33  * This Model class handles term table interactions.
34  * This tables stores relevant terms related to posts, photos and searches, like hashtags, mentions and
35  * user-applied categories.
36  */
37 class Term
38 {
39     const UNKNOWN           = 0;
40     const HASHTAG           = 1;
41     const MENTION           = 2;
42     const CATEGORY          = 3;
43     const PCATEGORY         = 4;
44     const FILE              = 5;
45     const SAVEDSEARCH       = 6;
46     const CONVERSATION      = 7;
47         /**
48          * An implicit mention is a mention in a comment body that is redundant with the threading information.
49          */
50     const IMPLICIT_MENTION  = 8;
51         /**
52          * An exclusive mention transfers the ownership of the post to the target account, usually a forum.
53          */
54     const EXCLUSIVE_MENTION = 9;
55
56     const TAG_CHARACTER = [
57         self::HASHTAG           => '#',
58         self::MENTION           => '@',
59         self::IMPLICIT_MENTION  => '%',
60         self::EXCLUSIVE_MENTION => '!',
61     ];
62
63     const OBJECT_TYPE_POST  = 1;
64     const OBJECT_TYPE_PHOTO = 2;
65
66         /**
67          * Returns a list of the most frequent global hashtags over the given period
68          *
69          * @param int $period Period in hours to consider posts
70          * @return array
71          * @throws \Exception
72          */
73         public static function getGlobalTrendingHashtags(int $period, $limit = 10)
74         {
75                 $tags = DI::cache()->get('global_trending_tags');
76
77                 if (!$tags) {
78                         $tagsStmt = DBA::p("SELECT t.`term`, COUNT(*) AS `score`
79                                 FROM `term` t
80                                  JOIN `item` i ON i.`id` = t.`oid` AND i.`uid` = t.`uid`
81                                  JOIN `thread` ON `thread`.`iid` = i.`id`
82                                 WHERE `thread`.`visible`
83                                   AND NOT `thread`.`deleted`
84                                   AND NOT `thread`.`moderated`
85                                   AND `thread`.`private` = ?
86                                   AND t.`uid` = 0
87                                   AND t.`otype` = ?
88                                   AND t.`type` = ?
89                                   AND t.`term` != ''
90                                   AND i.`received` > DATE_SUB(NOW(), INTERVAL ? HOUR)
91                                 GROUP BY `term`
92                                 ORDER BY `score` DESC
93                                 LIMIT ?",
94                                 Item::PUBLIC,
95                                 Term::OBJECT_TYPE_POST,
96                                 Term::HASHTAG,
97                                 $period,
98                                 $limit
99                         );
100
101                         if (DBA::isResult($tagsStmt)) {
102                                 $tags = DBA::toArray($tagsStmt);
103                                 DI::cache()->set('global_trending_tags', $tags, Duration::HOUR);
104                         }
105                 }
106
107                 return $tags ?: [];
108         }
109
110         /**
111          * Returns a list of the most frequent local hashtags over the given period
112          *
113          * @param int $period Period in hours to consider posts
114          * @return array
115          * @throws \Exception
116          */
117         public static function getLocalTrendingHashtags(int $period, $limit = 10)
118         {
119                 $tags = DI::cache()->get('local_trending_tags');
120
121                 if (!$tags) {
122                         $tagsStmt = DBA::p("SELECT t.`term`, COUNT(*) AS `score`
123                                 FROM `term` t
124                                 JOIN `item` i ON i.`id` = t.`oid` AND i.`uid` = t.`uid`
125                                 JOIN `thread` ON `thread`.`iid` = i.`id`
126                                 WHERE `thread`.`visible`
127                                   AND NOT `thread`.`deleted`
128                                   AND NOT `thread`.`moderated`
129                                   AND `thread`.`private` = ?
130                                   AND `thread`.`wall`
131                                   AND `thread`.`origin`
132                                   AND t.`otype` = ?
133                                   AND t.`type` = ?
134                                   AND t.`term` != ''
135                                   AND i.`received` > DATE_SUB(NOW(), INTERVAL ? HOUR)
136                                 GROUP BY `term`
137                                 ORDER BY `score` DESC
138                                 LIMIT ?",
139                                 Item::PUBLIC,
140                                 Term::OBJECT_TYPE_POST,
141                                 Term::HASHTAG,
142                                 $period,
143                                 $limit
144                         );
145
146                         if (DBA::isResult($tagsStmt)) {
147                                 $tags = DBA::toArray($tagsStmt);
148                                 DI::cache()->set('local_trending_tags', $tags, Duration::HOUR);
149                         }
150                 }
151
152                 return $tags ?: [];
153         }
154
155         /**
156          * Generates the legacy item.tag field comma-separated BBCode string from an item ID.
157          * Includes only hashtags, implicit and explicit mentions.
158          *
159          * @param int $item_id
160          * @return string
161          * @throws \Exception
162          */
163         public static function tagTextFromItemId($item_id)
164         {
165                 $tag_list = [];
166                 $tags = self::tagArrayFromItemId($item_id, [self::HASHTAG, self::MENTION, self::IMPLICIT_MENTION]);
167                 foreach ($tags as $tag) {
168                         $tag_list[] = self::TAG_CHARACTER[$tag['type']] . '[url=' . $tag['url'] . ']' . $tag['term'] . '[/url]';
169                 }
170
171                 return implode(',', $tag_list);
172         }
173
174         /**
175          * Retrieves the terms from the provided type(s) associated with the provided item ID.
176          *
177          * @param int       $item_id
178          * @param int|array $type
179          * @return array
180          * @throws \Exception
181          */
182         public static function tagArrayFromItemId($item_id, $type = [self::HASHTAG, self::MENTION])
183         {
184                 $condition = ['otype' => self::OBJECT_TYPE_POST, 'oid' => $item_id, 'type' => $type];
185                 $tags = DBA::select('term', ['type', 'term', 'url'], $condition);
186                 if (!DBA::isResult($tags)) {
187                         return [];
188                 }
189
190                 return DBA::toArray($tags);
191         }
192
193         /**
194          * Generates the legacy item.file field string from an item ID.
195          * Includes only file and category terms.
196          *
197          * @param int $item_id
198          * @return string
199          * @throws \Exception
200          */
201         public static function fileTextFromItemId($item_id)
202         {
203                 $file_text = '';
204                 $tags = self::tagArrayFromItemId($item_id, [self::FILE, self::CATEGORY]);
205                 foreach ($tags as $tag) {
206                         if ($tag['type'] == self::CATEGORY) {
207                                 $file_text .= '<' . $tag['term'] . '>';
208                         } else {
209                                 $file_text .= '[' . $tag['term'] . ']';
210                         }
211                 }
212
213                 return $file_text;
214         }
215
216         /**
217          * Inserts new terms for the provided item ID based on the legacy item.tag field BBCode content.
218          * Deletes all previous tag terms for the same item ID.
219          * Sets both the item.mention and thread.mentions field flags if a mention concerning the item UID is found.
220          *
221          * @param int    $item_id
222          * @param string $tag_str
223          * @throws \Friendica\Network\HTTPException\InternalServerErrorException
224          */
225         public static function insertFromTagFieldByItemId($item_id, $tag_str)
226         {
227                 $profile_base = DI::baseUrl();
228                 $profile_data = parse_url($profile_base);
229                 $profile_path = $profile_data['path'] ?? '';
230                 $profile_base_friendica = $profile_data['host'] . $profile_path . '/profile/';
231                 $profile_base_diaspora = $profile_data['host'] . $profile_path . '/u/';
232
233                 $fields = ['guid', 'uid', 'id', 'edited', 'deleted', 'created', 'received', 'title', 'body', 'parent'];
234                 $item = Item::selectFirst($fields, ['id' => $item_id]);
235                 if (!DBA::isResult($item)) {
236                         return;
237                 }
238
239                 $item['tag'] = $tag_str;
240
241                 // Clean up all tags
242                 self::deleteByItemId($item_id);
243
244                 if ($item['deleted']) {
245                         return;
246                 }
247
248                 $taglist = explode(',', $item['tag']);
249
250                 $tags_string = '';
251                 foreach ($taglist as $tag) {
252                         if (Strings::startsWith($tag, self::TAG_CHARACTER)) {
253                                 $tags_string .= ' ' . trim($tag);
254                         } else {
255                                 $tags_string .= ' #' . trim($tag);
256                         }
257                 }
258
259                 $data = ' ' . $item['title'] . ' ' . $item['body'] . ' ' . $tags_string . ' ';
260
261                 // ignore anything in a code block
262                 $data = preg_replace('/\[code\](.*?)\[\/code\]/sm', '', $data);
263
264                 $tags = [];
265
266                 $pattern = '/\W\#([^\[].*?)[\s\'".,:;\?!\[\]\/]/ism';
267                 if (preg_match_all($pattern, $data, $matches)) {
268                         foreach ($matches[1] as $match) {
269                                 $tags['#' . $match] = '';
270                         }
271                 }
272
273                 $pattern = '/\W([\#@!%])\[url\=(.*?)\](.*?)\[\/url\]/ism';
274                 if (preg_match_all($pattern, $data, $matches, PREG_SET_ORDER)) {
275                         foreach ($matches as $match) {
276
277                                 if (in_array($match[1], [
278                                         self::TAG_CHARACTER[self::MENTION],
279                                         self::TAG_CHARACTER[self::IMPLICIT_MENTION],
280                                         self::TAG_CHARACTER[self::EXCLUSIVE_MENTION]
281                                 ])) {
282                                         $contact = Contact::getDetailsByURL($match[2], 0);
283                                         if (!empty($contact['addr'])) {
284                                                 $match[3] = $contact['addr'];
285                                         }
286
287                                         if (!empty($contact['url'])) {
288                                                 $match[2] = $contact['url'];
289                                         }
290                                 }
291
292                                 $tags[$match[2]] = $match[1] . trim($match[3], ',.:;[]/\"?!');
293                         }
294                 }
295
296                 foreach ($tags as $link => $tag) {
297                         if (self::isType($tag, self::HASHTAG)) {
298                                 // try to ignore #039 or #1 or anything like that
299                                 if (ctype_digit(substr(trim($tag), 1))) {
300                                         continue;
301                                 }
302
303                                 // try to ignore html hex escapes, e.g. #x2317
304                                 if ((substr(trim($tag), 1, 1) == 'x' || substr(trim($tag), 1, 1) == 'X') && ctype_digit(substr(trim($tag), 2))) {
305                                         continue;
306                                 }
307
308                                 $type = self::HASHTAG;
309                                 $term = substr($tag, 1);
310                                 $link = '';
311                         } elseif (self::isType($tag, self::MENTION, self::EXCLUSIVE_MENTION, self::IMPLICIT_MENTION)) {
312                                 if (self::isType($tag, self::MENTION, self::EXCLUSIVE_MENTION)) {
313                                         $type = self::MENTION;
314                                 } else {
315                                         $type = self::IMPLICIT_MENTION;
316                                 }
317
318                                 $contact = Contact::getDetailsByURL($link, 0);
319                                 if (!empty($contact['name'])) {
320                                         $term = $contact['name'];
321                                 } else {
322                                         $term = substr($tag, 1);
323                                 }
324                         } else { // This shouldn't happen
325                                 $type = self::HASHTAG;
326                                 $term = $tag;
327                                 $link = '';
328
329                                 Logger::notice('Unknown term type', ['tag' => $tag]);
330                         }
331
332                         if (DBA::exists('term', ['uid' => $item['uid'], 'otype' => self::OBJECT_TYPE_POST, 'oid' => $item_id, 'term' => $term, 'type' => $type])) {
333                                 continue;
334                         }
335
336                         if ($item['uid'] == 0) {
337                                 $global = true;
338                                 DBA::update('term', ['global' => true], ['otype' => self::OBJECT_TYPE_POST, 'guid' => $item['guid']]);
339                         } else {
340                                 $global = DBA::exists('term', ['uid' => 0, 'otype' => self::OBJECT_TYPE_POST, 'guid' => $item['guid']]);
341                         }
342
343                         DBA::insert('term', [
344                                 'uid'      => $item['uid'],
345                                 'oid'      => $item_id,
346                                 'otype'    => self::OBJECT_TYPE_POST,
347                                 'type'     => $type,
348                                 'term'     => $term,
349                                 'url'      => $link,
350                                 'guid'     => $item['guid'],
351                                 'created'  => $item['created'],
352                                 'received' => $item['received'],
353                                 'global'   => $global
354                         ]);
355
356                         // Search for mentions
357                         if (self::isType($tag, self::MENTION, self::EXCLUSIVE_MENTION)
358                                 && (
359                                         strpos($link, $profile_base_friendica) !== false
360                                         || strpos($link, $profile_base_diaspora) !== false
361                                 )
362                         ) {
363                                 $users_stmt = DBA::p("SELECT `uid` FROM `contact` WHERE self AND (`url` = ? OR `nurl` = ?)", $link, $link);
364                                 $users = DBA::toArray($users_stmt);
365                                 foreach ($users AS $user) {
366                                         if ($user['uid'] == $item['uid']) {
367                                                 /// @todo This function is called from Item::update - so we mustn't call that function here
368                                                 DBA::update('item', ['mention' => true], ['id' => $item_id]);
369                                                 DBA::update('thread', ['mention' => true], ['iid' => $item['parent']]);
370                                         }
371                                 }
372                         }
373                 }
374         }
375
376         /**
377          * Inserts new terms for the provided item ID based on the legacy item.file field BBCode content.
378          * Deletes all previous file terms for the same item ID.
379          *
380          * @param integer $item_id item id
381          * @param         $files
382          * @return void
383          * @throws \Exception
384          */
385         public static function insertFromFileFieldByItemId($item_id, $files)
386         {
387                 $message = Item::selectFirst(['uid', 'deleted'], ['id' => $item_id]);
388                 if (!DBA::isResult($message)) {
389                         return;
390                 }
391
392                 // Clean up all tags
393                 DBA::delete('term', ['otype' => self::OBJECT_TYPE_POST, 'oid' => $item_id, 'type' => [self::FILE, self::CATEGORY]]);
394
395                 if ($message["deleted"]) {
396                         return;
397                 }
398
399                 $message['file'] = $files;
400
401                 if (preg_match_all("/\[(.*?)\]/ism", $message["file"], $files)) {
402                         foreach ($files[1] as $file) {
403                                 DBA::insert('term', [
404                                         'uid' => $message["uid"],
405                                         'oid' => $item_id,
406                                         'otype' => self::OBJECT_TYPE_POST,
407                                         'type' => self::FILE,
408                                         'term' => $file
409                                 ]);
410                         }
411                 }
412
413                 if (preg_match_all("/\<(.*?)\>/ism", $message["file"], $files)) {
414                         foreach ($files[1] as $file) {
415                                 DBA::insert('term', [
416                                         'uid' => $message["uid"],
417                                         'oid' => $item_id,
418                                         'otype' => self::OBJECT_TYPE_POST,
419                                         'type' => self::CATEGORY,
420                                         'term' => $file
421                                 ]);
422                         }
423                 }
424         }
425
426         /**
427          * Sorts an item's tags into mentions, hashtags and other tags. Generate personalized URLs by user and modify the
428          * provided item's body with them.
429          *
430          * @param array $item
431          * @return array
432          * @throws \Friendica\Network\HTTPException\InternalServerErrorException
433          * @throws \ImagickException
434          */
435         public static function populateTagsFromItem(&$item)
436         {
437                 $return = [
438                         'tags' => [],
439                         'hashtags' => [],
440                         'mentions' => [],
441                         'implicit_mentions' => [],
442                 ];
443
444                 $searchpath = DI::baseUrl() . "/search?tag=";
445
446                 $taglist = DBA::select(
447                         'term',
448                         ['type', 'term', 'url'],
449                         ['otype' => self::OBJECT_TYPE_POST, 'oid' => $item['id'], 'type' => [self::HASHTAG, self::MENTION, self::IMPLICIT_MENTION]],
450                         ['order' => ['tid']]
451                 );
452                 while ($tag = DBA::fetch($taglist)) {
453                         if ($tag['url'] == '') {
454                                 $tag['url'] = $searchpath . rawurlencode($tag['term']);
455                         }
456
457                         $orig_tag = $tag['url'];
458
459                         $prefix = self::TAG_CHARACTER[$tag['type']];
460                         switch($tag['type']) {
461                                 case self::HASHTAG:
462                                         if ($orig_tag != $tag['url']) {
463                                                 $item['body'] = str_replace($orig_tag, $tag['url'], $item['body']);
464                                         }
465
466                                         $return['hashtags'][] = $prefix . '<a href="' . $tag['url'] . '" target="_blank" rel="noopener noreferrer">' . htmlspecialchars($tag['term']) . '</a>';
467                                         $return['tags'][] = $prefix . '<a href="' . $tag['url'] . '" target="_blank" rel="noopener noreferrer">' . htmlspecialchars($tag['term']) . '</a>';
468                                         break;
469                                 case self::MENTION:
470                                         $tag['url'] = Contact::magicLink($tag['url']);
471                                         $return['mentions'][] = $prefix . '<a href="' . $tag['url'] . '" target="_blank" rel="noopener noreferrer">' . htmlspecialchars($tag['term']) . '</a>';
472                                         $return['tags'][] = $prefix . '<a href="' . $tag['url'] . '" target="_blank" rel="noopener noreferrer">' . htmlspecialchars($tag['term']) . '</a>';
473                                         break;
474                                 case self::IMPLICIT_MENTION:
475                                         $return['implicit_mentions'][] = $prefix . $tag['term'];
476                                         break;
477                         }
478                 }
479                 DBA::close($taglist);
480
481                 return $return;
482         }
483
484         /**
485          * Delete tags of the specific type(s) from an item
486          *
487          * @param int       $item_id
488          * @param int|array $type
489          * @throws \Exception
490          */
491         public static function deleteByItemId($item_id, $type = [self::HASHTAG, self::MENTION, self::IMPLICIT_MENTION])
492         {
493                 if (empty($item_id)) {
494                         return;
495                 }
496
497                 // Clean up all tags
498                 DBA::delete('term', ['otype' => self::OBJECT_TYPE_POST, 'oid' => $item_id, 'type' => $type]);
499         }
500
501         /**
502          * Check if the provided tag is of one of the provided term types.
503          *
504          * @param string $tag
505          * @param int    ...$types
506          * @return bool
507          */
508         public static function isType($tag, ...$types)
509         {
510                 $tag_chars = [];
511                 foreach ($types as $type) {
512                         if (array_key_exists($type, self::TAG_CHARACTER)) {
513                                 $tag_chars[] = self::TAG_CHARACTER[$type];
514                         }
515                 }
516
517                 return Strings::startsWith($tag, $tag_chars);
518         }
519 }