]> git.mxchange.org Git - friendica.git/blob - src/Model/ItemContent.php
Issue 9231: Speed up full text search
[friendica.git] / src / Model / ItemContent.php
1 <?php
2 /**
3  * @copyright Copyright (C) 2020, Friendica
4  *
5  * @license GNU AGPL version 3 or any later version
6  *
7  * This program is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU Affero General Public License as
9  * published by the Free Software Foundation, either version 3 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU Affero General Public License for more details.
16  *
17  * You should have received a copy of the GNU Affero General Public License
18  * along with this program.  If not, see <https://www.gnu.org/licenses/>.
19  *
20  */
21
22 namespace Friendica\Model;
23
24 use Friendica\Content\Text;
25 use Friendica\Content\Text\BBCode;
26 use Friendica\Core\Protocol;
27 use Friendica\Database\DBA;
28 use Friendica\DI;
29
30 class ItemContent
31 {
32         public static function getURIIdListBySearch(string $search, int $uid = 0, int $start = 0, int $limit = 100)
33         {
34                 $condition = ["`uri-id` IN (SELECT `uri-id` FROM `item-content` WHERE MATCH (`body`) AGAINST (? IN BOOLEAN MODE))
35                         AND (NOT `private` OR (`private` AND `uid` = ?))", $search, $uid];
36                 $params = [
37                         'order' => ['uri-id' => true],
38                         'group_by' => ['uri-id'],
39                         'limit' => [$start, $limit]
40                 ];
41
42                 $tags = DBA::select('item', ['uri-id'], $condition, $params);
43
44                 $uriids = [];
45                 while ($tag = DBA::fetch($tags)) {
46                         $uriids[] = $tag['uri-id'];
47                 }
48                 DBA::close($tags);
49
50                 return $uriids;
51         }
52
53         /**
54          * Convert a message into plaintext for connectors to other networks
55          *
56          * @param array  $item           The message array that is about to be posted
57          * @param int    $limit          The maximum number of characters when posting to that network
58          * @param bool   $includedlinks  Has an attached link to be included into the message?
59          * @param int    $htmlmode       This controls the behavior of the BBCode conversion
60          * @param string $target_network Name of the network where the post should go to.
61          *
62          * @return array Same array structure than \Friendica\Content\Text\BBCode::getAttachedData
63          * @throws \Friendica\Network\HTTPException\InternalServerErrorException
64          * @see   \Friendica\Content\Text\BBCode::getAttachedData
65          *
66          */
67         public static function getPlaintextPost($item, $limit = 0, $includedlinks = false, $htmlmode = BBCode::API, $target_network = '')
68         {
69                 // Remove hashtags
70                 $URLSearchString = '^\[\]';
71                 $body = preg_replace("/([#@])\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", '$1$3', $item['body']);
72
73                 // Add an URL element if the text contains a raw link
74                 $body = preg_replace('/([^\]\=\'"]|^)(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,]+)/ism',
75                         '$1[url]$2[/url]', $body);
76
77                 // Remove the abstract
78                 $body = Text\BBCode::stripAbstract($body);
79
80                 // At first look at data that is attached via "type-..." stuff
81                 // This will hopefully replaced with a dedicated bbcode later
82                 //$post = self::getAttachedData($b['body']);
83                 $post = Text\BBCode::getAttachedData($body, $item);
84
85                 if (($item['title'] != '') && ($post['text'] != '')) {
86                         $post['text'] = trim($item['title'] . "\n\n" . $post['text']);
87                 } elseif ($item['title'] != '') {
88                         $post['text'] = trim($item['title']);
89                 }
90
91                 $abstract = '';
92
93                 // Fetch the abstract from the given target network
94                 if ($target_network != '') {
95                         $default_abstract = Text\BBCode::getAbstract($item['body']);
96                         $abstract = Text\BBCode::getAbstract($item['body'], $target_network);
97
98                         // If we post to a network with no limit we only fetch
99                         // an abstract exactly for this network
100                         if (($limit == 0) && ($abstract == $default_abstract)) {
101                                 $abstract = '';
102                         }
103                 } else {// Try to guess the correct target network
104                         switch ($htmlmode) {
105                                 case BBCode::TWITTER:
106                                         $abstract = Text\BBCode::getAbstract($item['body'], Protocol::TWITTER);
107                                         break;
108
109                                 case BBCode::OSTATUS:
110                                         $abstract = Text\BBCode::getAbstract($item['body'], Protocol::STATUSNET);
111                                         break;
112
113                                 default: // We don't know the exact target.
114                                         // We fetch an abstract since there is a posting limit.
115                                         if ($limit > 0) {
116                                                 $abstract = Text\BBCode::getAbstract($item['body']);
117                                         }
118                         }
119                 }
120
121                 if ($abstract != '') {
122                         $post['text'] = $abstract;
123
124                         if ($post['type'] == 'text') {
125                                 $post['type'] = 'link';
126                                 $post['url'] = $item['plink'];
127                         }
128                 }
129
130                 $html = Text\BBCode::convert($post['text'] . ($post['after'] ?? ''), false, $htmlmode);
131                 $msg = Text\HTML::toPlaintext($html, 0, true);
132                 $msg = trim(html_entity_decode($msg, ENT_QUOTES, 'UTF-8'));
133
134                 $link = '';
135                 if ($includedlinks) {
136                         if ($post['type'] == 'link') {
137                                 $link = $post['url'];
138                         } elseif ($post['type'] == 'text') {
139                                 $link = $post['url'] ?? '';
140                         } elseif ($post['type'] == 'video') {
141                                 $link = $post['url'];
142                         } elseif ($post['type'] == 'photo') {
143                                 $link = $post['image'];
144                         }
145
146                         if (($msg == '') && isset($post['title'])) {
147                                 $msg = trim($post['title']);
148                         }
149
150                         if (($msg == '') && isset($post['description'])) {
151                                 $msg = trim($post['description']);
152                         }
153
154                         // If the link is already contained in the post, then it neeedn't to be added again
155                         // But: if the link is beyond the limit, then it has to be added.
156                         if (($link != '') && strstr($msg, $link)) {
157                                 $pos = strpos($msg, $link);
158
159                                 // Will the text be shortened in the link?
160                                 // Or is the link the last item in the post?
161                                 if (($limit > 0) && ($pos < $limit) && (($pos + 23 > $limit) || ($pos + strlen($link) == strlen($msg)))) {
162                                         $msg = trim(str_replace($link, '', $msg));
163                                 } elseif (($limit == 0) || ($pos < $limit)) {
164                                         // The limit has to be increased since it will be shortened - but not now
165                                         // Only do it with Twitter
166                                         if (($limit > 0) && (strlen($link) > 23) && ($htmlmode == BBCode::TWITTER)) {
167                                                 $limit = $limit - 23 + strlen($link);
168                                         }
169
170                                         $link = '';
171
172                                         if ($post['type'] == 'text') {
173                                                 unset($post['url']);
174                                         }
175                                 }
176                         }
177                 }
178
179                 if ($limit > 0) {
180                         // Reduce multiple spaces
181                         // When posted to a network with limited space, we try to gain space where possible
182                         while (strpos($msg, '  ') !== false) {
183                                 $msg = str_replace('  ', ' ', $msg);
184                         }
185
186                         // Twitter is using its own limiter, so we always assume that shortened links will have this length
187                         if (iconv_strlen($link, 'UTF-8') > 0) {
188                                 $limit = $limit - 23;
189                         }
190
191                         if (iconv_strlen($msg, 'UTF-8') > $limit) {
192                                 if (($post['type'] == 'text') && isset($post['url'])) {
193                                         $post['url'] = $item['plink'];
194                                 } elseif (!isset($post['url'])) {
195                                         $limit = $limit - 23;
196                                         $post['url'] = $item['plink'];
197                                 } elseif (strpos($item['body'], '[share') !== false) {
198                                         $post['url'] = $item['plink'];
199                                 } elseif (DI::pConfig()->get($item['uid'], 'system', 'no_intelligent_shortening')) {
200                                         $post['url'] = $item['plink'];
201                                 }
202                                 $msg = Text\Plaintext::shorten($msg, $limit);
203                         }
204                 }
205
206                 $post['text'] = trim($msg);
207
208                 return $post;
209         }
210 }