3 * @copyright Copyright (C) 2010-2023, the Friendica project
5 * @license GNU AGPL version 3 or any later version
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU Affero General Public License as
9 * published by the Free Software Foundation, either version 3 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Affero General Public License for more details.
17 * You should have received a copy of the GNU Affero General Public License
18 * along with this program. If not, see <https://www.gnu.org/licenses/>.
22 namespace Friendica\Content;
24 use Friendica\Core\Hook;
25 use Friendica\Core\Logger;
27 use Friendica\Network\HTTPException;
28 use Friendica\Util\ParseUrl;
29 use Friendica\Util\Strings;
32 * Extracts trailing URLs from post bodies to transform them in enriched attachment tags through Site Info query
38 * @param bool $searchNakedUrls
39 * @param bool $no_photos
41 * @throws HTTPException\InternalServerErrorException
43 public static function searchAndAppendToBody(string $body, bool $searchNakedUrls = false, bool $no_photos = false)
45 Logger::debug('add_page_info_to_body: fetch page info for body', ['body' => $body]);
47 $url = self::getRelevantUrlFromBody($body, $searchNakedUrls);
52 $data = self::queryUrl($url);
57 return self::appendDataToBody($body, $data, $no_photos);
63 * @param bool $no_photos
65 * @throws HTTPException\InternalServerErrorException
67 public static function appendDataToBody(string $body, array $data, bool $no_photos = false): string
69 // Only one [attachment] tag per body is allowed
70 $existingAttachmentPos = strpos($body, '[attachment');
71 if ($existingAttachmentPos !== false) {
72 $linkTitle = $data['title'] ?: $data['url'];
73 // Additional link attachments are prepended before the existing [attachment] tag
74 $body = substr_replace($body, "\n[bookmark=" . $data['url'] . ']' . $linkTitle . "[/bookmark]\n", $existingAttachmentPos, 0);
76 $footer = self::getFooterFromData($data, $no_photos);
77 $body = self::stripTrailingUrlFromBody($body, $data['url']);
78 $body .= "\n" . $footer;
86 * @param bool $no_photos
87 * @param string $photo
88 * @param bool $keywords
89 * @param string $keyword_denylist
91 * @throws HTTPException\InternalServerErrorException
93 public static function getFooterFromUrl(string $url, bool $no_photos = false, string $photo = '', bool $keywords = false, string $keyword_denylist = ''): string
95 $data = self::queryUrl($url, $photo, $keywords, $keyword_denylist);
97 return self::getFooterFromData($data, $no_photos);
102 * @param bool $no_photos
104 * @throws HTTPException\InternalServerErrorException
106 public static function getFooterFromData(array $data, bool $no_photos = false): string
108 Hook::callAll('page_info_data', $data);
110 if (empty($data['type'])) {
114 // It maybe is a rich content, but if it does have everything that a link has,
115 // then treat it that way
116 if (($data['type'] == 'rich') && is_string($data['title']) &&
117 is_string($data['text']) && !empty($data['images'])) {
118 $data['type'] = 'link';
121 $data['title'] = $data['title'] ?? '';
123 if ((($data['type'] != 'link') && ($data['type'] != 'video') && ($data['type'] != 'photo')) || ($data['title'] == $data['url'])) {
127 if ($no_photos && ($data['type'] == 'photo')) {
131 // Escape some bad characters
132 $text = "[attachment";
134 foreach (['type', 'url', 'title', 'alternative_title', 'publisher_name', 'publisher_url', 'publisher_img', 'author_name', 'author_url', 'author_img'] as $field) {
135 if (!empty($data[$field])) {
136 $text .= " " . $field . "='" . str_replace(['[', ']'], ['[', ']'], htmlentities($data[$field], ENT_QUOTES, 'UTF-8', false)) . "'";
140 if (empty($data['text'])) {
144 // Only embed a picture link when it seems to be a valid picture ("width" is set)
145 if (!empty($data['images']) && !empty($data['images'][0]['width'])) {
146 $preview = str_replace(['[', ']'], ['[', ']'], htmlentities($data['images'][0]['src'], ENT_QUOTES, 'UTF-8', false));
147 // if the preview picture is larger than 500 pixels then show it in a larger mode
148 // But only, if the picture isn't higher than large (To prevent huge posts)
149 if (!DI::config()->get('system', 'always_show_preview') && ($data['images'][0]['width'] >= 500)
150 && ($data['images'][0]['width'] >= $data['images'][0]['height'])) {
151 $text .= " image='" . $preview . "'";
153 $text .= " preview='" . $preview . "'";
155 if (empty($data['text'])) {
156 $data['text'] = $data['title'];
159 if (empty($data['text'])) {
160 $data['text'] = $data['url'];
165 $text .= ']' . str_replace(['[', ']'], ['[', ']'], $data['text']) . '[/attachment]';
168 if (!empty($data['keywords'])) {
170 foreach ($data['keywords'] as $keyword) {
171 /// @TODO make a positive list of allowed characters
172 $hashtag = str_replace([' ', '+', '/', '.', '#', '@', "'", '"', '’', '`', '(', ')', '„', '“'], '', $keyword);
173 $hashtags .= '#[url=' . DI::baseUrl() . '/search?tag=' . $hashtag . ']' . $hashtag . '[/url] ';
177 return $text . $hashtags;
182 * @param string $photo
183 * @param bool $keywords
184 * @param string $keyword_denylist
186 * @throws HTTPException\InternalServerErrorException
188 public static function queryUrl(string $url, string $photo = '', bool $keywords = false, string $keyword_denylist = '')
190 $data = ParseUrl::getSiteinfoCached($url);
193 $data['images'][0]['src'] = $photo;
197 unset($data['keywords']);
198 } elseif ($keyword_denylist && !empty($data['keywords'])) {
199 $list = explode(', ', $keyword_denylist);
201 foreach ($list as $keyword) {
202 $keyword = trim($keyword);
204 $index = array_search($keyword, $data['keywords']);
205 if ($index !== false) {
206 unset($data['keywords'][$index]);
211 Logger::debug('fetch page info for URL', ['url' => $url, 'data' => $data]);
218 * @param string $photo
219 * @param string $keyword_denylist
221 * @throws HTTPException\InternalServerErrorException
223 public static function getTagsFromUrl(string $url, string $photo = '', string $keyword_denylist = ''): array
225 $data = self::queryUrl($url, $photo, true, $keyword_denylist);
227 if (empty($data['keywords'])) {
232 foreach ($data['keywords'] as $keyword) {
233 $hashtag = str_replace([' ', '+', '/', '.', '#', "'"],
234 ['', '', '', '', '', ''], $keyword);
236 $taglist[] = $hashtag;
243 * Picks a non-hashtag, non-mention, schemeful URL at the end of the provided body string to be converted into Page Info.
245 * @param string $body
246 * @param bool $searchNakedUrls Whether we should pick a naked URL (outside of BBCode tags) as a last resort
247 * @return string|null
249 public static function getRelevantUrlFromBody(string $body, bool $searchNakedUrls = false)
251 $URLSearchString = 'https?://[^\[\]]*';
253 // Fix for Mastodon where the mentions are in a different format
254 $body = preg_replace("~\[url=($URLSearchString)]([#!@])(.*?)\[/url]~is", '$2[url=$1]$3[/url]', $body);
256 // Remove all hashtags and mentions
257 $body = preg_replace("/([#@!])\[url\=(.*?)\](.*?)\[\/url\]/ism", '', $body);
259 // Search for pure links
260 preg_match("/\[url\](https?:.*?)\[\/url\]/ism", $body, $matches);
263 // Search for links with descriptions
264 preg_match("/\[url\=(https?:.*?)\].*?\[\/url\]/ism", $body, $matches);
267 if (!$matches && $searchNakedUrls) {
268 preg_match(Strings::autoLinkRegEx(), $body, $matches);
269 if ($matches && !Strings::endsWith($body, $matches[1])) {
274 return $matches[1] ?? null;
278 * Remove the provided URL from the body if it is at the end of it.
279 * Keep the link label if it isn't the full URL or a shortened version of it.
281 * @param string $body
285 protected static function stripTrailingUrlFromBody(string $body, string $url): string
287 $quotedUrl = preg_quote($url, '#');
288 $body = preg_replace_callback("#(?:
289 \[url]$quotedUrl\[/url]|
290 \[url=$quotedUrl]$quotedUrl\[/url]|
291 \[url=$quotedUrl]([^[]*?)\[/url]|
293 )$#isx", function ($match) use ($url) {
294 // Stripping URLs with no label
295 if (empty($match[1])) {
299 // Stripping link labels that include a shortened version of the URL
300 $trimMatch = trim($match[1], '.…');
301 if (!empty($trimMatch) && strpos($url, $trimMatch) !== false) {
305 // Keep all other labels