3 * @copyright Copyright (C) 2020, Friendica
5 * @license GNU AGPL version 3 or any later version
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU Affero General Public License as
9 * published by the Free Software Foundation, either version 3 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Affero General Public License for more details.
17 * You should have received a copy of the GNU Affero General Public License
18 * along with this program. If not, see <https://www.gnu.org/licenses/>.
22 namespace Friendica\Content;
24 use Friendica\Core\Hook;
25 use Friendica\Core\Logger;
27 use Friendica\Network\HTTPException;
28 use Friendica\Util\ParseUrl;
29 use Friendica\Util\Strings;
32 * Extracts trailing URLs from post bodies to transform them in enriched attachment tags through Site Info query
38 * @param bool $searchNakedUrls
39 * @param bool $no_photos
41 * @throws HTTPException\InternalServerErrorException
43 public static function appendToBody(string $body, bool $searchNakedUrls = false, bool $no_photos = false)
45 Logger::info('add_page_info_to_body: fetch page info for body', ['body' => $body]);
47 $url = self::getRelevantUrlFromBody($body, $searchNakedUrls);
52 $footer = self::getFooterFromUrl($url, $no_photos);
57 $body = self::stripTrailingUrlFromBody($body, $url);
59 $body .= "\n" . $footer;
66 * @param bool $no_photos
67 * @param string $photo
68 * @param bool $keywords
69 * @param string $keyword_denylist
71 * @throws HTTPException\InternalServerErrorException
73 public static function getFooterFromUrl(string $url, bool $no_photos = false, string $photo = '', bool $keywords = false, string $keyword_denylist = '')
75 $data = self::queryUrl($url, $photo, $keywords, $keyword_denylist);
77 return self::getFooterFromData($data, $no_photos);
82 * @param bool $no_photos
84 * @throws HTTPException\InternalServerErrorException
86 public static function getFooterFromData(array $data, bool $no_photos = false)
88 Hook::callAll('page_info_data', $data);
90 if (empty($data['type'])) {
94 // It maybe is a rich content, but if it does have everything that a link has,
95 // then treat it that way
96 if (($data['type'] == 'rich') && is_string($data['title']) &&
97 is_string($data['text']) && !empty($data['images'])) {
98 $data['type'] = 'link';
101 $data['title'] = $data['title'] ?? '';
103 if ((($data['type'] != 'link') && ($data['type'] != 'video') && ($data['type'] != 'photo')) || ($data['title'] == $data['url'])) {
107 if ($no_photos && ($data['type'] == 'photo')) {
111 // Escape some bad characters
112 $data['url'] = str_replace(['[', ']'], ['[', ']'], htmlentities($data['url'], ENT_QUOTES, 'UTF-8', false));
113 $data['title'] = str_replace(['[', ']'], ['[', ']'], htmlentities($data['title'], ENT_QUOTES, 'UTF-8', false));
115 $text = "[attachment type='" . $data['type'] . "'";
117 if (empty($data['text'])) {
118 $data['text'] = $data['title'];
121 if (empty($data['text'])) {
122 $data['text'] = $data['url'];
125 if (!empty($data['url'])) {
126 $text .= " url='" . $data['url'] . "'";
129 if (!empty($data['title'])) {
130 $text .= " title='" . $data['title'] . "'";
133 // Only embedd a picture link when it seems to be a valid picture ("width" is set)
134 if (!empty($data['images']) && !empty($data['images'][0]['width'])) {
135 $preview = str_replace(['[', ']'], ['[', ']'], htmlentities($data['images'][0]['src'], ENT_QUOTES, 'UTF-8', false));
136 // if the preview picture is larger than 500 pixels then show it in a larger mode
137 // But only, if the picture isn't higher than large (To prevent huge posts)
138 if (!DI::config()->get('system', 'always_show_preview') && ($data['images'][0]['width'] >= 500)
139 && ($data['images'][0]['width'] >= $data['images'][0]['height'])) {
140 $text .= " image='" . $preview . "'";
142 $text .= " preview='" . $preview . "'";
146 $text .= ']' . $data['text'] . '[/attachment]';
149 if (!empty($data['keywords'])) {
151 foreach ($data['keywords'] as $keyword) {
152 /// @TODO make a positive list of allowed characters
153 $hashtag = str_replace([' ', '+', '/', '.', '#', '@', "'", '"', '’', '`', '(', ')', '„', '“'], '', $keyword);
154 $hashtags .= '#[url=' . DI::baseUrl() . '/search?tag=' . $hashtag . ']' . $hashtag . '[/url] ';
158 return $text . $hashtags;
163 * @param string $photo
164 * @param bool $keywords
165 * @param string $keyword_denylist
167 * @throws HTTPException\InternalServerErrorException
169 public static function queryUrl(string $url, string $photo = '', bool $keywords = false, string $keyword_denylist = '')
171 $data = ParseUrl::getSiteinfoCached($url, true);
174 $data['images'][0]['src'] = $photo;
178 unset($data['keywords']);
179 } elseif ($keyword_denylist) {
180 $list = explode(', ', $keyword_denylist);
182 foreach ($list as $keyword) {
183 $keyword = trim($keyword);
185 $index = array_search($keyword, $data['keywords']);
186 if ($index !== false) {
187 unset($data['keywords'][$index]);
192 Logger::info('fetch page info for URL', ['url' => $url, 'data' => $data]);
199 * @param string $photo
200 * @param string $keyword_denylist
202 * @throws HTTPException\InternalServerErrorException
204 public static function getTagsFromUrl(string $url, string $photo = '', string $keyword_denylist = '')
206 $data = self::queryUrl($url, $photo, true, $keyword_denylist);
209 foreach ($data['keywords'] as $keyword) {
210 $hashtag = str_replace([' ', '+', '/', '.', '#', "'"],
211 ['', '', '', '', '', ''], $keyword);
213 $taglist[] = $hashtag;
220 * Picks a non-hashtag, non-mention, schemeful URL at the end of the provided body string to be converted into Page Info.
222 * @param string $body
223 * @param bool $searchNakedUrls Whether we should pick a naked URL (outside of BBCode tags) as a last resort
224 * @return string|null
226 protected static function getRelevantUrlFromBody(string $body, bool $searchNakedUrls = false)
228 $URLSearchString = 'https?://[^\[\]]*';
230 // Fix for Mastodon where the mentions are in a different format
231 $body = preg_replace("~\[url=($URLSearchString)]([#!@])(.*?)\[/url]~is", '$2[url=$1]$3[/url]', $body);
233 preg_match("~(?<![!#@])\[url]($URLSearchString)\[/url]$~is", $body, $matches);
236 preg_match("~(?<![!#@])\[url=($URLSearchString)].*\[/url]$~is", $body, $matches);
239 if (!$matches && $searchNakedUrls) {
240 preg_match('~(?<=\W|^)(?<![=\]])(https?://.+)$~is', $body, $matches);
241 if ($matches && !Strings::endsWith($body, $matches[1])) {
246 return $matches[1] ?? null;
250 * Remove the provided URL from the body if it is at the end of it.
251 * Keep the link label if it isn't the full URL.
253 * @param string $body
255 * @return string|string[]|null
257 protected static function stripTrailingUrlFromBody(string $body, string $url)
259 $quotedUrl = preg_quote($url, '#');
260 $body = preg_replace("#(?:
261 \[url]$quotedUrl\[/url]|
262 \[url=$quotedUrl]$quotedUrl\[/url]|
263 \[url=$quotedUrl]([^[]*?)\[/url]|
265 )$#isx", '$1', $body);