include/items.php

   1 <?php
   2 /**
   3  * @copyright Copyright (C) 2020, Friendica
   4  *
   5  * @license GNU AGPL version 3 or any later version
   6  *
   7  * This program is free software: you can redistribute it and/or modify
   8  * it under the terms of the GNU Affero General Public License as
   9  * published by the Free Software Foundation, either version 3 of the
  10  * License, or (at your option) any later version.
  11  *
  12  * This program is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  * GNU Affero General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Affero General Public License
  18  * along with this program.  If not, see <https://www.gnu.org/licenses/>.
  19  *
  20  */
  21
  22 use Friendica\Core\Hook;
  23 use Friendica\Core\Logger;
  24 use Friendica\Core\Protocol;
  25 use Friendica\Core\Renderer;
  26 use Friendica\Core\Session;
  27 use Friendica\Database\DBA;
  28 use Friendica\DI;
  29 use Friendica\Model\Item;
  30 use Friendica\Protocol\DFRN;
  31 use Friendica\Protocol\Feed;
  32 use Friendica\Protocol\OStatus;
  33 use Friendica\Util\Network;
  34 use Friendica\Util\ParseUrl;
  35 use Friendica\Util\Strings;
  36
  37 require_once __DIR__ . '/../mod/share.php';
  38
  39 function add_page_info_data(array $data, $no_photos = false)
  40 {
  41         Hook::callAll('page_info_data', $data);
  42
  43         if (empty($data['type'])) {
  44                 return '';
  45         }
  46
  47         // It maybe is a rich content, but if it does have everything that a link has,
  48         // then treat it that way
  49         if (($data["type"] == "rich") && is_string($data["title"]) &&
  50                 is_string($data["text"]) && !empty($data["images"])) {
  51                 $data["type"] = "link";
  52         }
  53
  54         $data["title"] = $data["title"] ?? '';
  55
  56         if ((($data["type"] != "link") && ($data["type"] != "video") && ($data["type"] != "photo")) || ($data["title"] == $data["url"])) {
  57                 return "";
  58         }
  59
  60         if ($no_photos && ($data["type"] == "photo")) {
  61                 return "";
  62         }
  63
  64         // Escape some bad characters
  65         $data["url"] = str_replace(["[", "]"], ["&#91;", "&#93;"], htmlentities($data["url"], ENT_QUOTES, 'UTF-8', false));
  66         $data["title"] = str_replace(["[", "]"], ["&#91;", "&#93;"], htmlentities($data["title"], ENT_QUOTES, 'UTF-8', false));
  67
  68         $text = "[attachment type='".$data["type"]."'";
  69
  70         if (empty($data["text"])) {
  71                 $data["text"] = $data["title"];
  72         }
  73
  74         if (empty($data["text"])) {
  75                 $data["text"] = $data["url"];
  76         }
  77
  78         if (!empty($data["url"])) {
  79                 $text .= " url='".$data["url"]."'";
  80         }
  81
  82         if (!empty($data["title"])) {
  83                 $text .= " title='".$data["title"]."'";
  84         }
  85
  86         // Only embedd a picture link when it seems to be a valid picture ("width" is set)
  87         if (!empty($data["images"]) && !empty($data["images"][0]["width"])) {
  88                 $preview = str_replace(["[", "]"], ["&#91;", "&#93;"], htmlentities($data["images"][0]["src"], ENT_QUOTES, 'UTF-8', false));
  89                 // if the preview picture is larger than 500 pixels then show it in a larger mode
  90                 // But only, if the picture isn't higher than large (To prevent huge posts)
  91                 if (!DI::config()->get('system', 'always_show_preview') && ($data["images"][0]["width"] >= 500)
  92                         && ($data["images"][0]["width"] >= $data["images"][0]["height"])) {
  93                         $text .= " image='".$preview."'";
  94                 } else {
  95                         $text .= " preview='".$preview."'";
  96                 }
  97         }
  98
  99         $text .= "]".$data["text"]."[/attachment]";
 100
 101         $hashtags = "";
 102         if (isset($data["keywords"]) && count($data["keywords"])) {
 103                 $hashtags = "\n";
 104                 foreach ($data["keywords"] as $keyword) {
 105                         /// @TODO make a positive list of allowed characters
 106                         $hashtag = str_replace([' ', '+', '/', '.', '#', '@', "'", '"', '’', '`', '(', ')', '„', '“'], '', $keyword);
 107                         $hashtags .= "#[url=" . DI::baseUrl() . "/search?tag=" . $hashtag . "]" . $hashtag . "[/url] ";
 108                 }
 109         }
 110
 111         return "\n".$text.$hashtags;
 112 }
 113
 114 function query_page_info($url, $photo = "", $keywords = false, $keyword_denylist = "")
 115 {
 116         $data = ParseUrl::getSiteinfoCached($url, true);
 117
 118         if ($photo != "") {
 119                 $data["images"][0]["src"] = $photo;
 120         }
 121
 122         Logger::log('fetch page info for ' . $url . ' ' . print_r($data, true), Logger::DEBUG);
 123
 124         if (!$keywords && isset($data["keywords"])) {
 125                 unset($data["keywords"]);
 126         }
 127
 128         if (($keyword_denylist != "") && isset($data["keywords"])) {
 129                 $list = explode(", ", $keyword_denylist);
 130
 131                 foreach ($list as $keyword) {
 132                         $keyword = trim($keyword);
 133
 134                         $index = array_search($keyword, $data["keywords"]);
 135                         if ($index !== false) {
 136                                 unset($data["keywords"][$index]);
 137                         }
 138                 }
 139         }
 140
 141         return $data;
 142 }
 143
 144 function get_page_keywords($url, $photo = "", $keywords = false, $keyword_denylist = "")
 145 {
 146         $data = query_page_info($url, $photo, $keywords, $keyword_denylist);
 147         if (empty($data["keywords"]) || !is_array($data["keywords"])) {
 148                 return [];
 149         }
 150
 151         $taglist = [];
 152         foreach ($data['keywords'] as $keyword) {
 153                 $hashtag = str_replace([" ", "+", "/", ".", "#", "'"],
 154                         ["", "", "", "", "", ""], $keyword);
 155
 156                 $taglist[] = $hashtag;
 157         }
 158
 159         return $taglist;
 160 }
 161
 162 function add_page_info($url, $no_photos = false, $photo = "", $keywords = false, $keyword_denylist = "")
 163 {
 164         $data = query_page_info($url, $photo, $keywords, $keyword_denylist);
 165
 166         $text = '';
 167
 168         if (is_array($data)) {
 169                 $text = add_page_info_data($data, $no_photos);
 170         }
 171
 172         return $text;
 173 }
 174
 175 function add_page_info_to_body($body, $texturl = false, $no_photos = false)
 176 {
 177         Logger::log('add_page_info_to_body: fetch page info for body ' . $body, Logger::DEBUG);
 178
 179         $URLSearchString = "^\[\]";
 180
 181         // Fix for Mastodon where the mentions are in a different format
 182         $body = preg_replace("/\[url\=([$URLSearchString]*)\]([#!@])(.*?)\[\/url\]/ism",
 183                 '$2[url=$1]$3[/url]', $body);
 184
 185         // Adding these spaces is a quick hack due to my problems with regular expressions :)
 186         preg_match("/[^!#@]\[url\]([$URLSearchString]*)\[\/url\]/ism", " " . $body, $matches);
 187
 188         if (!$matches) {
 189                 preg_match("/[^!#@]\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", " " . $body, $matches);
 190         }
 191
 192         // Convert urls without bbcode elements
 193         if (!$matches && $texturl) {
 194                 preg_match("/([^\]\='".'"'."]|^)(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,]+)/ism", " ".$body, $matches);
 195
 196                 // Yeah, a hack. I really hate regular expressions :)
 197                 if ($matches) {
 198                         $matches[1] = $matches[2];
 199                 }
 200         }
 201
 202         if ($matches) {
 203                 $footer = add_page_info($matches[1], $no_photos);
 204         }
 205
 206         // Remove the link from the body if the link is attached at the end of the post
 207         if (isset($footer) && (trim($footer) != "") && (strpos($footer, $matches[1]))) {
 208                 $removedlink = trim(str_replace($matches[1], "", $body));
 209                 if (($removedlink == "") || strstr($body, $removedlink)) {
 210                         $body = $removedlink;
 211                 }
 212
 213                 $removedlink = preg_replace("/\[url\=" . preg_quote($matches[1], '/') . "\](.*?)\[\/url\]/ism", '', $body);
 214                 if (($removedlink == "") || strstr($body, $removedlink)) {
 215                         $body = $removedlink;
 216                 }
 217         }
 218
 219         // Add the page information to the bottom
 220         if (isset($footer) && (trim($footer) != "")) {
 221                 $body .= $footer;
 222         }
 223
 224         return $body;
 225 }
 226
 227 /**
 228  * @deprecated since 2020.06
 229  * @see \Friendica\Protocol\Feed::consume
 230  */
 231 function consume_feed($xml, array $importer, array $contact, &$hub)
 232 {
 233         \Friendica\Protocol\Feed::consume($xml, $importer, $contact, $hub);
 234 }