3 * @copyright Copyright (C) 2020, Friendica
5 * @license GNU AGPL version 3 or any later version
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU Affero General Public License as
9 * published by the Free Software Foundation, either version 3 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Affero General Public License for more details.
17 * You should have received a copy of the GNU Affero General Public License
18 * along with this program. If not, see <https://www.gnu.org/licenses/>.
22 use Friendica\Core\Hook;
23 use Friendica\Core\Logger;
24 use Friendica\Core\Protocol;
25 use Friendica\Core\Renderer;
26 use Friendica\Core\Session;
27 use Friendica\Database\DBA;
29 use Friendica\Model\Item;
30 use Friendica\Protocol\DFRN;
31 use Friendica\Protocol\Feed;
32 use Friendica\Protocol\OStatus;
33 use Friendica\Util\Network;
34 use Friendica\Util\ParseUrl;
35 use Friendica\Util\Strings;
37 require_once __DIR__ . '/../mod/share.php';
39 function add_page_info_data(array $data, $no_photos = false)
41 Hook::callAll('page_info_data', $data);
43 if (empty($data['type'])) {
47 // It maybe is a rich content, but if it does have everything that a link has,
48 // then treat it that way
49 if (($data["type"] == "rich") && is_string($data["title"]) &&
50 is_string($data["text"]) && !empty($data["images"])) {
51 $data["type"] = "link";
54 $data["title"] = $data["title"] ?? '';
56 if ((($data["type"] != "link") && ($data["type"] != "video") && ($data["type"] != "photo")) || ($data["title"] == $data["url"])) {
60 if ($no_photos && ($data["type"] == "photo")) {
64 // Escape some bad characters
65 $data["url"] = str_replace(["[", "]"], ["[", "]"], htmlentities($data["url"], ENT_QUOTES, 'UTF-8', false));
66 $data["title"] = str_replace(["[", "]"], ["[", "]"], htmlentities($data["title"], ENT_QUOTES, 'UTF-8', false));
68 $text = "[attachment type='".$data["type"]."'";
70 if (empty($data["text"])) {
71 $data["text"] = $data["title"];
74 if (empty($data["text"])) {
75 $data["text"] = $data["url"];
78 if (!empty($data["url"])) {
79 $text .= " url='".$data["url"]."'";
82 if (!empty($data["title"])) {
83 $text .= " title='".$data["title"]."'";
86 // Only embedd a picture link when it seems to be a valid picture ("width" is set)
87 if (!empty($data["images"]) && !empty($data["images"][0]["width"])) {
88 $preview = str_replace(["[", "]"], ["[", "]"], htmlentities($data["images"][0]["src"], ENT_QUOTES, 'UTF-8', false));
89 // if the preview picture is larger than 500 pixels then show it in a larger mode
90 // But only, if the picture isn't higher than large (To prevent huge posts)
91 if (!DI::config()->get('system', 'always_show_preview') && ($data["images"][0]["width"] >= 500)
92 && ($data["images"][0]["width"] >= $data["images"][0]["height"])) {
93 $text .= " image='".$preview."'";
95 $text .= " preview='".$preview."'";
99 $text .= "]".$data["text"]."[/attachment]";
102 if (isset($data["keywords"]) && count($data["keywords"])) {
104 foreach ($data["keywords"] as $keyword) {
105 /// @TODO make a positive list of allowed characters
106 $hashtag = str_replace([' ', '+', '/', '.', '#', '@', "'", '"', '’', '`', '(', ')', '„', '“'], '', $keyword);
107 $hashtags .= "#[url=" . DI::baseUrl() . "/search?tag=" . $hashtag . "]" . $hashtag . "[/url] ";
111 return "\n".$text.$hashtags;
114 function query_page_info($url, $photo = "", $keywords = false, $keyword_denylist = "")
116 $data = ParseUrl::getSiteinfoCached($url, true);
119 $data["images"][0]["src"] = $photo;
122 Logger::log('fetch page info for ' . $url . ' ' . print_r($data, true), Logger::DEBUG);
124 if (!$keywords && isset($data["keywords"])) {
125 unset($data["keywords"]);
128 if (($keyword_denylist != "") && isset($data["keywords"])) {
129 $list = explode(", ", $keyword_denylist);
131 foreach ($list as $keyword) {
132 $keyword = trim($keyword);
134 $index = array_search($keyword, $data["keywords"]);
135 if ($index !== false) {
136 unset($data["keywords"][$index]);
144 function get_page_keywords($url, $photo = "", $keywords = false, $keyword_denylist = "")
146 $data = query_page_info($url, $photo, $keywords, $keyword_denylist);
147 if (empty($data["keywords"]) || !is_array($data["keywords"])) {
152 foreach ($data['keywords'] as $keyword) {
153 $hashtag = str_replace([" ", "+", "/", ".", "#", "'"],
154 ["", "", "", "", "", ""], $keyword);
156 $taglist[] = $hashtag;
162 function add_page_info($url, $no_photos = false, $photo = "", $keywords = false, $keyword_denylist = "")
164 $data = query_page_info($url, $photo, $keywords, $keyword_denylist);
168 if (is_array($data)) {
169 $text = add_page_info_data($data, $no_photos);
175 function add_page_info_to_body($body, $texturl = false, $no_photos = false)
177 Logger::log('add_page_info_to_body: fetch page info for body ' . $body, Logger::DEBUG);
179 $URLSearchString = "^\[\]";
181 // Fix for Mastodon where the mentions are in a different format
182 $body = preg_replace("/\[url\=([$URLSearchString]*)\]([#!@])(.*?)\[\/url\]/ism",
183 '$2[url=$1]$3[/url]', $body);
185 // Adding these spaces is a quick hack due to my problems with regular expressions :)
186 preg_match("/[^!#@]\[url\]([$URLSearchString]*)\[\/url\]/ism", " " . $body, $matches);
189 preg_match("/[^!#@]\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", " " . $body, $matches);
192 // Convert urls without bbcode elements
193 if (!$matches && $texturl) {
194 preg_match("/([^\]\='".'"'."]|^)(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,]+)/ism", " ".$body, $matches);
196 // Yeah, a hack. I really hate regular expressions :)
198 $matches[1] = $matches[2];
203 $footer = add_page_info($matches[1], $no_photos);
206 // Remove the link from the body if the link is attached at the end of the post
207 if (isset($footer) && (trim($footer) != "") && (strpos($footer, $matches[1]))) {
208 $removedlink = trim(str_replace($matches[1], "", $body));
209 if (($removedlink == "") || strstr($body, $removedlink)) {
210 $body = $removedlink;
213 $removedlink = preg_replace("/\[url\=" . preg_quote($matches[1], '/') . "\](.*?)\[\/url\]/ism", '', $body);
214 if (($removedlink == "") || strstr($body, $removedlink)) {
215 $body = $removedlink;
219 // Add the page information to the bottom
220 if (isset($footer) && (trim($footer) != "")) {
228 * @deprecated since 2020.06
229 * @see \Friendica\Protocol\Feed::consume
231 function consume_feed($xml, array $importer, array $contact, &$hub)
233 \Friendica\Protocol\Feed::consume($xml, $importer, $contact, $hub);