]> git.mxchange.org Git - friendica-addons.git/commitdiff
[twitter] Rework twitter_expand_entities
authorHypolite Petovan <hypolite@mrpetovan.com>
Mon, 30 Mar 2020 06:01:41 +0000 (02:01 -0400)
committerHypolite Petovan <hypolite@mrpetovan.com>
Mon, 30 Mar 2020 14:49:06 +0000 (10:49 -0400)
- Uses Twitter-provided entity indices to avoid blanket string replacement and unwarranted tag search

twitter/twitter.php

index da8c3b3691dd86f0ee8171281ac69edb6eb56944..3822850396b34e8b3e2352bdc0fb24f3fbf6f090 100644 (file)
@@ -1202,164 +1202,142 @@ function twitter_fetchuser(App $a, $uid, $screen_name = "", $user_id = "")
        return $contact_id;
 }
 
-function twitter_expand_entities(App $a, $body, $item, $picture)
+/**
+ * Replaces Twitter entities with Friendica-friendly links.
+ *
+ * The Twitter API gives indices for each entity, which allows for fine-grained replacement.
+ *
+ * First, we need to collect everything that needs to be replaced, what we will replace it with, and the start index.
+ * Then we sort the indices decreasingly, and we replace from the end of the body to the start in order for the next
+ * index to be correct even after the last replacement.
+ *
+ * @param string   $body
+ * @param stdClass $status
+ * @param string   $picture
+ * @return array
+ * @throws \Friendica\Network\HTTPException\InternalServerErrorException
+ */
+function twitter_expand_entities($body, stdClass $status, $picture)
 {
        $plain = $body;
 
-       $tags_arr = [];
+       $tags = [];
 
-       foreach ($item->entities->hashtags AS $hashtag) {
-               $url = '#[url=' . DI::baseUrl()->get() . '/search?tag=' . $hashtag->text . ']' . $hashtag->text . '[/url]';
-               $tags_arr['#' . $hashtag->text] = $url;
-               $body = str_replace('#' . $hashtag->text, $url, $body);
-       }
+       $replacementList = [];
 
-       foreach ($item->entities->user_mentions AS $mention) {
-               $url = '@[url=https://twitter.com/' . rawurlencode($mention->screen_name) . ']' . $mention->screen_name . '[/url]';
-               $tags_arr['@' . $mention->screen_name] = $url;
-               $body = str_replace('@' . $mention->screen_name, $url, $body);
-       }
+       foreach ($status->entities->hashtags AS $hashtag) {
+               $replace = '#[url=' . DI::baseUrl()->get() . '/search?tag=' . $hashtag->text . ']' . $hashtag->text . '[/url]';
+               $tags['#' . $hashtag->text] = $replace;
 
-       if (isset($item->entities->urls)) {
-               $type = '';
-               $footerurl = '';
-               $footerlink = '';
-               $footer = '';
+               $replacementList[$hashtag->indices[0]] = [
+                       'replace' => $replace,
+                       'length' => $hashtag->indices[1] - $hashtag->indices[0],
+               ];
+       }
 
-               foreach ($item->entities->urls as $url) {
-                       $plain = str_replace($url->url, '', $plain);
+       foreach ($status->entities->user_mentions AS $mention) {
+               $replace = '@[url=https://twitter.com/' . rawurlencode($mention->screen_name) . ']' . $mention->screen_name . '[/url]';
+               $tags['@' . $mention->screen_name] = $replace;
 
-                       if ($url->url && $url->expanded_url && $url->display_url) {
-                               // Quote tweet, we just remove the quoted tweet URL from the body, the share block will be added later.
-                               if (!empty($item->quoted_status) && isset($item->quoted_status_id_str)
-                                       && substr($url->expanded_url, -strlen($item->quoted_status_id_str)) == $item->quoted_status_id_str ) {
-                                       $body = str_replace($url->url, '', $body);
-                                       continue;
-                               }
-
-                               $expanded_url = $url->expanded_url;
+               $replacementList[$mention->indices[0]] = [
+                       'replace' => $replace,
+                       'length' => $mention->indices[1] - $mention->indices[0],
+               ];
+       }
 
-                               $final_url = Network::finalUrl($url->expanded_url);
+       // This URL if set will be used to add an attachment at the bottom of the post
+       $attachmentUrl = '';
 
-                               $oembed_data = OEmbed::fetchURL($final_url);
+       foreach ($status->entities->urls ?? [] as $url) {
+               $plain = str_replace($url->url, '', $plain);
 
-                               if (empty($oembed_data) || empty($oembed_data->type)) {
-                                       continue;
-                               }
+               if ($url->url && $url->expanded_url && $url->display_url) {
 
-                               // Quickfix: Workaround for URL with '[' and ']' in it
-                               if (strpos($expanded_url, '[') || strpos($expanded_url, ']')) {
-                                       $expanded_url = $url->url;
-                               }
+                       // Quote tweet, we just remove the quoted tweet URL from the body, the share block will be added later.
+                       if (!empty($status->quoted_status) && isset($status->quoted_status_id_str)
+                               && substr($url->expanded_url, -strlen($status->quoted_status_id_str)) == $status->quoted_status_id_str
+                       ) {
+                               $replacementList[$url->indices[0]] = [
+                                       'replace' => '',
+                                       'length' => $url->indices[1] - $url->indices[0],
+                               ];
+                               continue;
+                       }
 
-                               if ($type == '') {
-                                       $type = $oembed_data->type;
-                               }
+                       $expanded_url = $url->expanded_url;
 
-                               if ($oembed_data->type == 'video') {
-                                       $type = $oembed_data->type;
-                                       $footerurl = $expanded_url;
-                                       $footerlink = '[url=' . $expanded_url . ']' . $url->display_url . '[/url]';
+                       $final_url = Network::finalUrl($url->expanded_url);
 
-                                       $body = str_replace($url->url, $footerlink, $body);
-                               } elseif (($oembed_data->type == 'photo') && isset($oembed_data->url)) {
-                                       $body = str_replace($url->url, '[url=' . $expanded_url . '][img]' . $oembed_data->url . '[/img][/url]', $body);
-                               } elseif ($oembed_data->type != 'link') {
-                                       $body = str_replace($url->url, '[url=' . $expanded_url . ']' . $url->display_url . '[/url]', $body);
-                               } else {
-                                       $img_str = Network::fetchUrl($final_url, true, 4);
+                       $oembed_data = OEmbed::fetchURL($final_url);
 
-                                       $tempfile = tempnam(get_temppath(), 'cache');
-                                       file_put_contents($tempfile, $img_str);
+                       if (empty($oembed_data) || empty($oembed_data->type)) {
+                               continue;
+                       }
 
-                                       // See http://php.net/manual/en/function.exif-imagetype.php#79283
-                                       if (filesize($tempfile) > 11) {
-                                               $mime = image_type_to_mime_type(exif_imagetype($tempfile));
-                                       } else {
-                                               $mime = false;
-                                       }
+                       // Quickfix: Workaround for URL with '[' and ']' in it
+                       if (strpos($expanded_url, '[') || strpos($expanded_url, ']')) {
+                               $expanded_url = $url->url;
+                       }
 
-                                       unlink($tempfile);
+                       if ($oembed_data->type == 'video') {
+                               $attachmentUrl = $expanded_url;
+                               $replace = '';
+                       } elseif (($oembed_data->type == 'photo') && isset($oembed_data->url)) {
+                               $replace = '[url=' . $expanded_url . '][img]' . $oembed_data->url . '[/img][/url]';
+                       } elseif ($oembed_data->type != 'link') {
+                               $replace = '[url=' . $expanded_url . ']' . $url->display_url . '[/url]';
+                       } else {
+                               $img_str = Network::fetchUrl($final_url, true, 4);
 
-                                       if (substr($mime, 0, 6) == 'image/') {
-                                               $type = 'photo';
-                                               $body = str_replace($url->url, '[img]' . $final_url . '[/img]', $body);
-                                       } else {
-                                               $type = $oembed_data->type;
-                                               $footerurl = $expanded_url;
-                                               $footerlink = '[url=' . $expanded_url . ']' . $url->display_url . '[/url]';
+                               $tempfile = tempnam(get_temppath(), 'cache');
+                               file_put_contents($tempfile, $img_str);
 
-                                               $body = str_replace($url->url, $footerlink, $body);
-                                       }
+                               // See http://php.net/manual/en/function.exif-imagetype.php#79283
+                               if (filesize($tempfile) > 11) {
+                                       $mime = image_type_to_mime_type(exif_imagetype($tempfile));
+                               } else {
+                                       $mime = false;
                                }
-                       }
-               }
-
-               // Footer will be taken care of with a share block in the case of a quote
-               if (empty($item->quoted_status)) {
-                       if ($footerurl != '') {
-                               $footer = add_page_info($footerurl, false, $picture);
-                       }
 
-                       if (($footerlink != '') && (trim($footer) != '')) {
-                               $removedlink = trim(str_replace($footerlink, '', $body));
+                               unlink($tempfile);
 
-                               if (($removedlink == '') || strstr($body, $removedlink)) {
-                                       $body = $removedlink;
+                               if (substr($mime, 0, 6) == 'image/') {
+                                       $replace = '[img]' . $final_url . '[/img]';
+                               } else {
+                                       $attachmentUrl = $expanded_url;
+                                       $replace = '';
                                }
-
-                               $body .= $footer;
                        }
 
-                       if ($footer == '' && $picture != '') {
-                               $body .= "\n\n[img]" . $picture . "[/img]\n";
-                       } elseif ($footer == '' && $picture == '') {
-                               $body = add_page_info_to_body($body);
-                       }
+                       $replacementList[$url->indices[0]] = [
+                               'replace' => $replace,
+                               'length' => $url->indices[1] - $url->indices[0],
+                       ];
                }
        }
 
-       // it seems as if the entities aren't always covering all mentions. So the rest will be checked here
-       $tags = BBCode::getTags($body);
+       krsort($replacementList);
 
-       if (count($tags)) {
-               foreach ($tags as $tag) {
-                       if (strstr(trim($tag), ' ')) {
-                               continue;
-                       }
-
-                       if (strpos($tag, '#') === 0) {
-                               if (strpos($tag, '[url=')) {
-                                       continue;
-                               }
-
-                               // don't link tags that are already embedded in links
-                               if (preg_match('/\[(.*?)' . preg_quote($tag, '/') . '(.*?)\]/', $body)) {
-                                       continue;
-                               }
-                               if (preg_match('/\[(.*?)\]\((.*?)' . preg_quote($tag, '/') . '(.*?)\)/', $body)) {
-                                       continue;
-                               }
+       foreach ($replacementList as $startIndex => $parameters) {
+               $body = Strings::substringReplace($body, $parameters['replace'], $startIndex, $parameters['length']);
+       }
 
-                               $basetag = str_replace('_', ' ', substr($tag, 1));
-                               $url = '#[url=' . DI::baseUrl()->get() . '/search?tag=' . $basetag . ']' . $basetag . '[/url]';
-                               $body = str_replace($tag, $url, $body);
-                               $tags_arr['#' . $basetag] = $url;
-                       } elseif (strpos($tag, '@') === 0) {
-                               if (strpos($tag, '[url=')) {
-                                       continue;
-                               }
+       // Footer will be taken care of with a share block in the case of a quote
+       if (empty($status->quoted_status)) {
+               $footer = '';
+               if ($attachmentUrl) {
+                       $footer = add_page_info($attachmentUrl, false, $picture);
+               }
 
-                               $basetag = substr($tag, 1);
-                               $url = '@[url=https://twitter.com/' . rawurlencode($basetag) . ']' . $basetag . '[/url]';
-                               $body = str_replace($tag, $url, $body);
-                               $tags_arr['@' . $basetag] = $url;
-                       }
+               if (trim($footer)) {
+                       $body .= $footer;
+               } elseif ($picture) {
+                       $body .= "\n\n[img]" . $picture . "[/img]\n";
+               } else {
+                       $body = add_page_info_to_body($body);
                }
        }
 
-       $tags = implode($tags_arr, ',');
-
        return ['body' => $body, 'tags' => $tags, 'plain' => $plain];
 }
 
@@ -1554,9 +1532,9 @@ function twitter_createpost(App $a, $uid, $post, array $self, $create_user, $onl
        // Search for media links
        $picture = twitter_media_entities($post, $postarray);
 
-       $converted = twitter_expand_entities($a, $postarray['body'], $post, $picture);
-       $postarray['body'] = $converted["body"];
-       $postarray['tag'] = $converted["tags"];
+       $converted = twitter_expand_entities($postarray['body'], $post, $picture);
+       $postarray['body'] = $converted['body'];
+       $postarray['tag'] = implode($converted['tags'], ',');
        $postarray['created'] = DateTimeFormat::utc($post->created_at);
        $postarray['edited'] = DateTimeFormat::utc($post->created_at);