]> git.mxchange.org Git - friendica.git/blob - src/Content/Text/Plaintext.php
Posts per author/server on the community pages (#13764)
[friendica.git] / src / Content / Text / Plaintext.php
1 <?php
2 /**
3  * @copyright Copyright (C) 2010-2023, the Friendica project
4  *
5  * @license GNU AGPL version 3 or any later version
6  *
7  * This program is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU Affero General Public License as
9  * published by the Free Software Foundation, either version 3 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU Affero General Public License for more details.
16  *
17  * You should have received a copy of the GNU Affero General Public License
18  * along with this program.  If not, see <https://www.gnu.org/licenses/>.
19  *
20  */
21
22 namespace Friendica\Content\Text;
23
24 use Friendica\Core\Protocol;
25 use Friendica\DI;
26 use Friendica\Model\Photo;
27 use Friendica\Model\Post;
28 use Friendica\Util\Network;
29
30 class Plaintext
31 {
32         // Assumed length of an URL when shortened via the network's own url shortener (e.g. Twitter)
33         const URL_LENGTH = 23;
34
35         /**
36          * Shortens message
37          *
38          * @param  string $msg
39          * @param  int    $limit
40          * @param  int    $uid
41          * @return string
42          *
43          * @todo For Twitter URLs aren't shortened, but they have to be calculated as if.
44          */
45         public static function shorten(string $msg, int $limit, int $uid = 0): string
46         {
47                 $ellipsis = html_entity_decode("&#x2026;", ENT_QUOTES, 'UTF-8');
48
49                 if (!empty($uid) && DI::pConfig()->get($uid, 'system', 'simple_shortening')) {
50                         return mb_substr(mb_substr(trim($msg), 0, $limit), 0, -3) . $ellipsis;
51                 }
52
53                 $lines = explode("\n", $msg);
54                 $msg = "";
55                 $recycle = html_entity_decode("&#x2672; ", ENT_QUOTES, 'UTF-8');
56                 foreach ($lines as $row => $line) {
57                         if (mb_strlen(trim($msg . "\n" . $line)) <= $limit) {
58                                 $msg = trim($msg . "\n" . $line);
59                         } elseif (($msg == "") || (($row == 1) && (substr($msg, 0, 4) == $recycle))) {
60                                 // Is the new message empty by now or is it a reshared message?
61                                 $msg = mb_substr(mb_substr(trim($msg . "\n" . $line), 0, $limit), 0, -3) . $ellipsis;
62                         } else {
63                                 break;
64                         }
65                 }
66
67                 return $msg;
68         }
69
70         /**
71          * Returns the character positions of the provided boundaries, optionally skipping a number of first occurrences
72          *
73          * @param string $text        Text to search
74          * @param string $open        Left boundary
75          * @param string $close       Right boundary
76          * @param int    $occurrences Number of first occurrences to skip
77          * @return boolean|array
78          */
79         public static function getBoundariesPosition($text, $open, $close, $occurrences = 0)
80         {
81                 if ($occurrences < 0) {
82                         $occurrences = 0;
83                 }
84
85                 $start_pos = -1;
86                 for ($i = 0; $i <= $occurrences; $i++) {
87                         if ($start_pos !== false) {
88                                 $start_pos = strpos($text, $open, $start_pos + 1);
89                         }
90                 }
91
92                 if ($start_pos === false) {
93                         return false;
94                 }
95
96                 $end_pos = strpos($text, $close, $start_pos);
97
98                 if ($end_pos === false) {
99                         return false;
100                 }
101
102                 $res = ['start' => $start_pos, 'end' => $end_pos];
103
104                 return $res;
105         }
106
107         /**
108          * Convert a message into plaintext for connectors to other networks
109          *
110          * @param array  $item           The message array that is about to be posted
111          * @param int    $limit          The maximum number of characters when posting to that network
112          * @param bool   $includedlinks  Has an attached link to be included into the message?
113          * @param int    $htmlmode       This controls the behavior of the BBCode conversion
114          *
115          * @return array Same array structure than \Friendica\Content\Text\BBCode::getAttachedData
116          * @throws \Friendica\Network\HTTPException\InternalServerErrorException
117          * @see   \Friendica\Content\Text\BBCode::getAttachedData
118          */
119         public static function getPost(array $item, int $limit = 0, bool $includedlinks = false, int $htmlmode = BBCode::MASTODON_API)
120         {
121                 // Fetch attached media information
122                 $post = self::getPostMedia($item);
123
124                 if (($item['title'] != '') && ($post['text'] != '')) {
125                         $post['text'] = trim($item['title'] . "\n\n" . $post['text']);
126                 } elseif ($item['title'] != '') {
127                         $post['text'] = trim($item['title']);
128                 }
129
130                 // Fetch the abstract from the given target network
131                 switch ($htmlmode) {
132                         case BBCode::TWITTER:
133                                 $abstract = BBCode::getAbstract($item['body'], Protocol::TWITTER);
134                                 break;
135
136                         case BBCode::OSTATUS:
137                                 $abstract = BBCode::getAbstract($item['body'], Protocol::STATUSNET);
138                                 break;
139
140                         case BBCode::BLUESKY:
141                                 $abstract = BBCode::getAbstract($item['body'], Protocol::BLUESKY);
142                                 break;
143         
144                         default: // We don't know the exact target.
145                                 // We fetch an abstract since there is a posting limit.
146                                 if ($limit > 0) {
147                                         $abstract = BBCode::getAbstract($item['body']);
148                                 }
149                 }
150
151                 if ($abstract != '') {
152                         $post['text'] = $abstract;
153
154                         if ($post['type'] == 'text') {
155                                 $post['type'] = 'link';
156                                 $post['url'] = $item['plink'];
157                         }
158                 }
159
160                 $html = BBCode::convertForUriId($item['uri-id'], $post['text'] . ($post['after'] ?? ''), $htmlmode);
161                 $msg = HTML::toPlaintext($html, 0, true);
162                 $msg = trim(html_entity_decode($msg, ENT_QUOTES, 'UTF-8'));
163
164                 $complete_msg = $msg;
165
166                 $link = '';
167                 if ($includedlinks) {
168                         if ($post['type'] == 'link') {
169                                 $link = $post['url'];
170                         } elseif ($post['type'] == 'text') {
171                                 $link = $post['url'] ?? '';
172                         } elseif ($post['type'] == 'video') {
173                                 $link = $post['url'];
174                         } elseif ($post['type'] == 'photo') {
175                                 $link = $post['image'];
176                         }
177
178                         if (($msg == '') && isset($post['title'])) {
179                                 $msg = trim($post['title']);
180                         }
181
182                         if (($msg == '') && isset($post['description'])) {
183                                 $msg = trim($post['description']);
184                         }
185
186                         // If the link is already contained in the post, then it needn't to be added again
187                         // But: if the link is beyond the limit, then it has to be added.
188                         if (($link != '') && strstr($msg, $link)) {
189                                 $pos = strpos($msg, $link);
190
191                                 // Will the text be shortened in the link?
192                                 // Or is the link the last item in the post?
193                                 if (($limit > 0) && ($pos < $limit) && (($pos + self::URL_LENGTH > $limit) || ($pos + mb_strlen($link) == mb_strlen($msg)))) {
194                                         $msg = trim(str_replace($link, '', $msg));
195                                 } elseif (($limit == 0) || ($pos < $limit)) {
196                                         // The limit has to be increased since it will be shortened - but not now
197                                         // Only do it with Twitter
198                                         if (($limit > 0) && (mb_strlen($link) > self::URL_LENGTH) && ($htmlmode == BBCode::TWITTER)) {
199                                                 $limit = $limit - self::URL_LENGTH + mb_strlen($link);
200                                         }
201
202                                         $link = '';
203
204                                         if ($post['type'] == 'text') {
205                                                 unset($post['url']);
206                                         }
207                                 }
208                         }
209                 }
210
211                 if ($limit > 0) {
212                         // Reduce multiple spaces
213                         // When posted to a network with limited space, we try to gain space where possible
214                         while (strpos($msg, '  ') !== false) {
215                                 $msg = str_replace('  ', ' ', $msg);
216                         }
217
218                         if (!in_array($link, ['', $item['plink']]) && ($post['type'] != 'photo') && (strpos($complete_msg, $link) === false)) {
219                                 $complete_msg .= "\n" . $link;
220                         }
221
222                         $post['parts'] = self::getParts(trim($complete_msg), $limit);
223
224                         // Twitter is using its own limiter, so we always assume that shortened links will have this length
225                         if (mb_strlen($link) > 0) {
226                                 $limit = $limit - self::URL_LENGTH;
227                         }
228
229                         if (mb_strlen($msg) > $limit) {
230                                 if (($post['type'] == 'text') && isset($post['url'])) {
231                                         $post['url'] = $item['plink'];
232                                 } elseif (!isset($post['url'])) {
233                                         $limit = $limit - self::URL_LENGTH;
234                                         $post['url'] = $item['plink'];
235                                 } elseif (strpos($item['body'], '[share') !== false) {
236                                         $post['url'] = $item['plink'];
237                                 } elseif (DI::pConfig()->get($item['uid'], 'system', 'no_intelligent_shortening')) {
238                                         $post['url'] = $item['plink'];
239                                 }
240                                 $msg = self::shorten($msg, $limit, $item['uid']);
241                         }
242                 }
243
244                 $post['text'] = trim($msg);
245
246                 return $post;
247         }
248
249         /**
250          * Split the message in parts
251          *
252          * @param string  $message
253          * @param integer $baselimit
254          * @return array
255          */
256         private static function getParts(string $message, int $baselimit): array
257         {
258                 $parts = [];
259                 $part = '';
260
261                 $limit = $baselimit;
262
263                 while ($message) {
264                         $pos1 = strpos($message, ' ');
265                         $pos2 = strpos($message, "\n");
266
267                         if (($pos1 !== false) && ($pos2 !== false)) {
268                                 $pos = min($pos1, $pos2) + 1;
269                         } elseif ($pos1 !== false) {
270                                 $pos = $pos1 + 1;
271                         } elseif ($pos2 !== false) {
272                                 $pos = $pos2 + 1;
273                         } else {
274                                 $word = $message;
275                                 $message = '';
276                         }
277
278                         if (trim($message)) {
279                                 $word    = substr($message, 0, $pos);
280                                 $message = trim(substr($message, $pos));
281                         }
282
283                         if (Network::isValidHttpUrl(trim($word))) {
284                                 $limit += mb_strlen(trim($word)) - self::URL_LENGTH;
285                         }
286
287                         if ((mb_strlen($part . $word) > $limit - 8) && ($parts || (mb_strlen($part . $word . $message) > $limit))) {
288                                 $parts[] = trim($part);
289                                 $part    = '';
290                                 $limit   = $baselimit;
291                         }
292                         $part .= $word;
293                 }
294                 $parts[] = trim($part);
295
296                 if (count($parts) > 1) {
297                         foreach ($parts as $key => $part) {
298                                 $parts[$key] .= ' (' . ($key + 1) . '/' . count($parts) . ')';
299                         }
300                 }
301
302                 return $parts;
303         }
304
305         /**
306          * Fetch attached media to the post and simplify the body.
307          *
308          * @param array $item
309          * @return array
310          */
311         private static function getPostMedia(array $item): array
312         {
313                 $post = ['type' => 'text', 'images' => [], 'remote_images' => []];
314
315                 // Remove mentions and hashtag links
316                 $URLSearchString = '^\[\]';
317                 $post['text'] = preg_replace("/([#!@])\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", '$1$3', $item['body']);
318
319                 // Remove abstract
320                 $post['text'] = BBCode::stripAbstract($post['text']);
321                 // Remove attached links
322                 $post['text'] = BBCode::removeAttachment($post['text']);
323                 // Remove any links
324                 $post['text'] = Post\Media::removeFromBody($post['text']);
325
326                 $images = Post\Media::getByURIId($item['uri-id'], [Post\Media::IMAGE]);
327                 if (!empty($item['quote-uri-id']) && ($item['quote-uri-id'] != $item['uri-id'])) {
328                         $images = array_merge($images, Post\Media::getByURIId($item['quote-uri-id'], [Post\Media::IMAGE]));
329                 }
330                 foreach ($images as $image) {
331                         if ($id = Photo::getIdForName($image['url'])) {
332                                 $post['images'][] = ['url' => $image['url'], 'description' => $image['description'], 'id' => $id];
333                         } else {
334                                 $post['remote_images'][] = ['url' => $image['url'], 'description' => $image['description']];
335                         }
336                 }
337
338                 if (empty($post['images'])) {
339                         unset($post['images']);
340                 }
341
342                 if (empty($post['remote_images'])) {
343                         unset($post['remote_images']);
344                 }
345
346                 if (!empty($post['images'])) {
347                         $post['type']              = 'photo';
348                         $post['image']             = $post['images'][0]['url'];
349                         $post['image_description'] = $post['images'][0]['description'];
350                 } elseif (!empty($post['remote_images'])) {
351                         $post['type']              = 'photo';
352                         $post['image']             = $post['remote_images'][0]['url'];
353                         $post['image_description'] = $post['remote_images'][0]['description'];
354                 }
355
356                 // Look for audio or video links
357                 $media = Post\Media::getByURIId($item['uri-id'], [Post\Media::AUDIO, Post\Media::VIDEO]);
358                 if (!empty($item['quote-uri-id']) && ($item['quote-uri-id'] != $item['uri-id'])) {
359                         $media = array_merge($media, Post\Media::getByURIId($item['quote-uri-id'], [Post\Media::AUDIO, Post\Media::VIDEO]));
360                 }
361
362                 foreach ($media as $medium) {
363                         if (in_array($medium['type'], [Post\Media::AUDIO, Post\Media::VIDEO])) {
364                                 $post['type'] = 'link';
365                                 $post['url']  = $medium['url'];
366                         }
367                 }
368
369                 // Look for an attached link
370                 $page = Post\Media::getByURIId($item['uri-id'], [Post\Media::HTML]);
371                 if (!empty($item['quote-uri-id']) && empty($page)) {
372                         $page = Post\Media::getByURIId($item['quote-uri-id'], [Post\Media::HTML]);
373                 }
374                 if (!empty($page)) {
375                         $post['type']          = 'link';
376                         $post['url']           = $page[0]['url'];
377                         $post['description']   = $page[0]['description'];
378                         $post['title']         = $page[0]['name'];
379
380                         if (empty($post['image']) && !empty($page[0]['preview'])) {
381                                 $post['image'] = $page[0]['preview'];
382                         }
383                 }
384
385                 return $post;
386         }
387 }