4 * @file include/plaintext.php
7 use \Friendica\ParseUrl;
9 require_once("include/Photo.php");
10 require_once("include/bbcode.php");
11 require_once("include/html2plain.php");
12 require_once("include/network.php");
15 * @brief Fetches attachment data that were generated the old way
17 * @param string $body Message body
19 * 'type' -> Message type ("link", "video", "photo")
20 * 'text' -> Text before the shared message
21 * 'after' -> Text after the shared message
22 * 'image' -> Preview image of the message
23 * 'url' -> Url to the attached message
24 * 'title' -> Title of the attachment
25 * 'description' -> Description of the attachment
27 function get_old_attachment_data($body) {
31 // Simplify image codes
32 $body = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $body);
34 if (preg_match_all("(\[class=(.*?)\](.*?)\[\/class\])ism",$body, $attached, PREG_SET_ORDER)) {
35 foreach ($attached AS $data) {
36 if (!in_array($data[1], array("type-link", "type-video", "type-photo")))
39 $post["type"] = substr($data[1], 5);
41 $pos = strpos($body, $data[0]);
43 $post["text"] = trim(substr($body, 0, $pos));
44 $post["after"] = trim(substr($body, $pos + strlen($data[0])));
46 $post["text"] = trim(str_replace($data[0], "", $body));
48 $attacheddata = $data[2];
50 $URLSearchString = "^\[\]";
52 if (preg_match("/\[img\]([$URLSearchString]*)\[\/img\]/ism", $attacheddata, $matches)) {
54 $picturedata = get_photo_info($matches[1]);
56 if (($picturedata[0] >= 500) AND ($picturedata[0] >= $picturedata[1]))
57 $post["image"] = $matches[1];
59 $post["preview"] = $matches[1];
62 if (preg_match("/\[bookmark\=([$URLSearchString]*)\](.*?)\[\/bookmark\]/ism", $attacheddata, $matches)) {
63 $post["url"] = $matches[1];
64 $post["title"] = $matches[2];
67 // Search for description
68 if (preg_match("/\[quote\](.*?)\[\/quote\]/ism", $attacheddata, $matches))
69 $post["description"] = $matches[1];
78 * @brief Fetches attachment data that were generated with the "attachment" element
80 * @param string $body Message body
82 * 'type' -> Message type ("link", "video", "photo")
83 * 'text' -> Text before the shared message
84 * 'after' -> Text after the shared message
85 * 'image' -> Preview image of the message
86 * 'url' -> Url to the attached message
87 * 'title' -> Title of the attachment
88 * 'description' -> Description of the attachment
90 function get_attachment_data($body) {
94 if (!preg_match("/(.*)\[attachment(.*?)\](.*?)\[\/attachment\](.*)/ism", $body, $match))
95 return get_old_attachment_data($body);
97 $attributes = $match[2];
99 $data["text"] = trim($match[1]);
102 preg_match("/type='(.*?)'/ism", $attributes, $matches);
103 if ($matches[1] != "")
104 $type = strtolower($matches[1]);
106 preg_match('/type="(.*?)"/ism', $attributes, $matches);
107 if ($matches[1] != "")
108 $type = strtolower($matches[1]);
113 if (!in_array($type, array("link", "audio", "photo", "video")))
117 $data["type"] = $type;
120 preg_match("/url='(.*?)'/ism", $attributes, $matches);
121 if ($matches[1] != "")
124 preg_match('/url="(.*?)"/ism', $attributes, $matches);
125 if ($matches[1] != "")
129 $data["url"] = html_entity_decode($url, ENT_QUOTES, 'UTF-8');
132 preg_match("/title='(.*?)'/ism", $attributes, $matches);
133 if ($matches[1] != "")
134 $title = $matches[1];
136 preg_match('/title="(.*?)"/ism', $attributes, $matches);
137 if ($matches[1] != "")
138 $title = $matches[1];
141 $title = bbcode(html_entity_decode($title, ENT_QUOTES, 'UTF-8'), false, false, true);
142 $title = html_entity_decode($title, ENT_QUOTES, 'UTF-8');
143 $title = str_replace(array("[", "]"), array("[", "]"), $title);
144 $data["title"] = $title;
148 preg_match("/image='(.*?)'/ism", $attributes, $matches);
149 if ($matches[1] != "")
150 $image = $matches[1];
152 preg_match('/image="(.*?)"/ism', $attributes, $matches);
153 if ($matches[1] != "")
154 $image = $matches[1];
157 $data["image"] = html_entity_decode($image, ENT_QUOTES, 'UTF-8');
160 preg_match("/preview='(.*?)'/ism", $attributes, $matches);
161 if ($matches[1] != "")
162 $preview = $matches[1];
164 preg_match('/preview="(.*?)"/ism', $attributes, $matches);
165 if ($matches[1] != "")
166 $preview = $matches[1];
169 $data["preview"] = html_entity_decode($preview, ENT_QUOTES, 'UTF-8');
171 $data["description"] = trim($match[3]);
173 $data["after"] = trim($match[4]);
178 function get_attached_data($body) {
181 - type: link, video, photo
189 $post = get_attachment_data($body);
191 // if nothing is found, it maybe having an image.
192 if (!isset($post["type"])) {
193 $URLSearchString = "^\[\]";
194 if (preg_match_all("(\[url=([$URLSearchString]*)\]\s*\[img\]([$URLSearchString]*)\[\/img\]\s*\[\/url\])ism", $body, $pictures, PREG_SET_ORDER)) {
195 if (count($pictures) == 1) {
196 // Checking, if the link goes to a picture
197 $data = ParseUrl::getSiteinfoCached($pictures[0][1], true);
200 // Sometimes photo posts to the own album are not detected at the start.
201 // So we seem to cannot use the cache for these cases. That's strange.
202 if (($data["type"] != "photo") AND strstr($pictures[0][1], "/photos/"))
203 $data = ParseUrl::getSiteinfo($pictures[0][1], true);
205 if ($data["type"] == "photo") {
206 $post["type"] = "photo";
207 if (isset($data["images"][0])) {
208 $post["image"] = $data["images"][0]["src"];
209 $post["url"] = $data["url"];
211 $post["image"] = $data["url"];
213 $post["preview"] = $pictures[0][2];
214 $post["text"] = str_replace($pictures[0][0], "", $body);
216 $imgdata = get_photo_info($pictures[0][1]);
217 if (substr($imgdata["mime"], 0, 6) == "image/") {
218 $post["type"] = "photo";
219 $post["image"] = $pictures[0][1];
220 $post["preview"] = $pictures[0][2];
221 $post["text"] = str_replace($pictures[0][0], "", $body);
224 } elseif (count($pictures) > 1) {
225 $post["type"] = "link";
226 $post["url"] = $b["plink"];
227 $post["image"] = $pictures[0][2];
228 $post["text"] = $body;
230 } elseif (preg_match_all("(\[img\]([$URLSearchString]*)\[\/img\])ism", $body, $pictures, PREG_SET_ORDER)) {
231 if (count($pictures) == 1) {
232 $post["type"] = "photo";
233 $post["image"] = $pictures[0][1];
234 $post["text"] = str_replace($pictures[0][0], "", $body);
235 } elseif (count($pictures) > 1) {
236 $post["type"] = "link";
237 $post["url"] = $b["plink"];
238 $post["image"] = $pictures[0][1];
239 $post["text"] = $body;
243 if (preg_match_all("(\[url\]([$URLSearchString]*)\[\/url\])ism", $body, $links, PREG_SET_ORDER)) {
244 if (count($links) == 1) {
245 $post["type"] = "text";
246 $post["url"] = $links[0][1];
247 $post["text"] = $body;
250 if (!isset($post["type"])) {
251 $post["type"] = "text";
252 $post["text"] = trim($body);
254 } elseif (isset($post["url"]) AND ($post["type"] == "video")) {
255 $data = ParseUrl::getSiteinfoCached($post["url"], true);
257 if (isset($data["images"][0]))
258 $post["image"] = $data["images"][0]["src"];
264 function shortenmsg($msg, $limit, $twitter = false) {
266 /// For Twitter URLs aren't shortened, but they have to be calculated as if.
268 $lines = explode("\n", $msg);
270 $recycle = html_entity_decode("♲ ", ENT_QUOTES, 'UTF-8');
271 foreach ($lines AS $row=>$line) {
272 if (iconv_strlen(trim($msg."\n".$line), "UTF-8") <= $limit)
273 $msg = trim($msg."\n".$line);
274 // Is the new message empty by now or is it a reshared message?
275 elseif (($msg == "") OR (($row == 1) AND (substr($msg, 0, 4) == $recycle)))
276 $msg = iconv_substr(iconv_substr(trim($msg."\n".$line), 0, $limit, "UTF-8"), 0, -3, "UTF-8")."...";
284 * @brief Convert a message into plaintext for connectors to other networks
286 * @param App $a The application class
287 * @param array $b The message array that is about to be posted
288 * @param int $limit The maximum number of characters when posting to that network
289 * @param bool $includedlinks Has an attached link to be included into the message?
290 * @param int $htmlmode This triggers the behaviour of the bbcode conversion
291 * @param string $target_network Name of the network where the post should go to.
293 * @return string The converted message
295 function plaintext($a, $b, $limit = 0, $includedlinks = false, $htmlmode = 2, $target_network = "") {
297 // Remove the hash tags
298 $URLSearchString = "^\[\]";
299 $body = preg_replace("/([#@])\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", '$1$3', $b["body"]);
301 // Add an URL element if the text contains a raw link
302 $body = preg_replace("/([^\]\='".'"'."]|^)(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,]+)/ism", '$1[url]$2[/url]', $body);
304 // Remove the abstract
305 $body = remove_abstract($body);
307 // At first look at data that is attached via "type-..." stuff
308 // This will hopefully replaced with a dedicated bbcode later
309 //$post = get_attached_data($b["body"]);
310 $post = get_attached_data($body);
312 if (($b["title"] != "") AND ($post["text"] != ""))
313 $post["text"] = trim($b["title"]."\n\n".$post["text"]);
314 elseif ($b["title"] != "")
315 $post["text"] = trim($b["title"]);
319 // Fetch the abstract from the given target network
320 if ($target_network != "") {
321 $default_abstract = fetch_abstract($b["body"]);
322 $abstract = fetch_abstract($b["body"], $target_network);
324 // If we post to a network with no limit we only fetch
325 // an abstract exactly for this network
326 if (($limit == 0) AND ($abstract == $default_abstract))
329 } else // Try to guess the correct target network
332 $abstract = fetch_abstract($b["body"], NETWORK_TWITTER);
335 $abstract = fetch_abstract($b["body"], NETWORK_STATUSNET);
338 $abstract = fetch_abstract($b["body"], NETWORK_APPNET);
340 default: // We don't know the exact target.
341 // We fetch an abstract since there is a posting limit.
343 $abstract = fetch_abstract($b["body"]);
346 if ($abstract != "") {
347 $post["text"] = $abstract;
349 if ($post["type"] == "text") {
350 $post["type"] = "link";
351 $post["url"] = $b["plink"];
355 $html = bbcode($post["text"].$post["after"], false, false, $htmlmode);
356 $msg = html2plain($html, 0, true);
357 $msg = trim(html_entity_decode($msg,ENT_QUOTES,'UTF-8'));
360 if ($includedlinks) {
361 if ($post["type"] == "link")
362 $link = $post["url"];
363 elseif ($post["type"] == "text")
364 $link = $post["url"];
365 elseif ($post["type"] == "video")
366 $link = $post["url"];
367 elseif ($post["type"] == "photo")
368 $link = $post["image"];
370 if (($msg == "") AND isset($post["title"]))
371 $msg = trim($post["title"]);
373 if (($msg == "") AND isset($post["description"]))
374 $msg = trim($post["description"]);
376 // If the link is already contained in the post, then it neeedn't to be added again
377 // But: if the link is beyond the limit, then it has to be added.
378 if (($link != "") AND strstr($msg, $link)) {
379 $pos = strpos($msg, $link);
381 // Will the text be shortened in the link?
382 // Or is the link the last item in the post?
383 if (($limit > 0) AND ($pos < $limit) AND (($pos + 23 > $limit) OR ($pos + strlen($link) == strlen($msg))))
384 $msg = trim(str_replace($link, "", $msg));
385 elseif (($limit == 0) OR ($pos < $limit)) {
386 // The limit has to be increased since it will be shortened - but not now
387 // Only do it with Twitter (htmlmode = 8)
388 if (($limit > 0) AND (strlen($link) > 23) AND ($htmlmode == 8))
389 $limit = $limit - 23 + strlen($link);
393 if ($post["type"] == "text")
400 // Reduce multiple spaces
401 // When posted to a network with limited space, we try to gain space where possible
402 while (strpos($msg, " ") !== false)
403 $msg = str_replace(" ", " ", $msg);
405 // Twitter is using its own limiter, so we always assume that shortened links will have this length
406 if (iconv_strlen($link, "UTF-8") > 0)
407 $limit = $limit - 23;
409 if (iconv_strlen($msg, "UTF-8") > $limit) {
411 if (($post["type"] == "text") AND isset($post["url"]))
412 $post["url"] = $b["plink"];
413 elseif (!isset($post["url"])) {
414 $limit = $limit - 23;
415 $post["url"] = $b["plink"];
416 } elseif (strpos($b["body"], "[share") !== false)
417 $post["url"] = $b["plink"];
418 elseif (get_pconfig($b["uid"], "system", "no_intelligent_shortening"))
419 $post["url"] = $b["plink"];
421 $msg = shortenmsg($msg, $limit);
425 $post["text"] = trim($msg);