]> git.mxchange.org Git - friendica.git/blobdiff - src/Util/ParseUrl.php
Merge branch 'develop' of https://github.com/friendica/friendica into develop
[friendica.git] / src / Util / ParseUrl.php
index b897c54ee7dd58da0a94d3a9ce92aaccb8d4e5c8..24089b9cbd2a92bb33c22fe4e41ee3b501987cff 100644 (file)
@@ -135,23 +135,23 @@ class ParseUrl
                $siteinfo['url'] = $url;
                $siteinfo['type'] = 'link';
 
-               $data = Network::curl($url);
-               if (!$data['success']) {
+               $curlResult = Network::curl($url);
+               if (!$curlResult->isSuccess()) {
                        return $siteinfo;
                }
 
                // If the file is too large then exit
-               if ($data['info']['download_content_length'] > 1000000) {
+               if (defaults($curlResult->getInfo(), 'download_content_length', 0) > 1000000) {
                        return $siteinfo;
                }
 
                // If it isn't a HTML file then exit
-               if (($data['info']['content_type'] != '') && !strstr(strtolower($data['info']['content_type']), 'html')) {
+               if (($curlResult->getContentType() != '') && !strstr(strtolower($curlResult->getContentType()), 'html')) {
                        return $siteinfo;
                }
 
-               $header = $data['header'];
-               $body = $data['body'];
+               $header = $curlResult->getHeader();
+               $body = $curlResult->getBody();
 
                if ($do_oembed) {
                        $oembed_data = OEmbed::fetchURL($url);
@@ -161,7 +161,8 @@ class ParseUrl
                                        $siteinfo['type'] = $oembed_data->type;
                                }
 
-                               if (($oembed_data->type == 'link') && ($siteinfo['type'] != 'photo')) {
+                               // See https://github.com/friendica/friendica/pull/5763#discussion_r217913178
+                               if ($siteinfo['type'] != 'photo') {
                                        if (isset($oembed_data->title)) {
                                                $siteinfo['title'] = trim($oembed_data->title);
                                        }
@@ -248,52 +249,52 @@ class ParseUrl
                                }
                        }
 
-                       if (!empty($meta_tag['content'])) {
-                               $meta_tag['content'] = trim(html_entity_decode($meta_tag['content'], ENT_QUOTES, 'UTF-8'));
-
-                               switch (strtolower($meta_tag['name'])) {
-                                       case 'fulltitle':
-                                               $siteinfo['title'] = trim($meta_tag['content']);
-                                               break;
-                                       case 'description':
-                                               $siteinfo['text'] = trim($meta_tag['content']);
-                                               break;
-                                       case 'thumbnail':
-                                               $siteinfo['image'] = $meta_tag['content'];
-                                               break;
-                                       case 'twitter:image':
-                                               $siteinfo['image'] = $meta_tag['content'];
-                                               break;
-                                       case 'twitter:image:src':
-                                               $siteinfo['image'] = $meta_tag['content'];
-                                               break;
-                                       case 'twitter:card':
-                                               if (($siteinfo['type'] == '') || ($meta_tag['content'] == 'photo')) {
-                                                       $siteinfo['type'] = $meta_tag['content'];
-                                               }
-                                               break;
-                                       case 'twitter:description':
-                                               $siteinfo['text'] = trim($meta_tag['content']);
-                                               break;
-                                       case 'twitter:title':
-                                               $siteinfo['title'] = trim($meta_tag['content']);
-                                               break;
-                                       case 'dc.title':
-                                               $siteinfo['title'] = trim($meta_tag['content']);
-                                               break;
-                                       case 'dc.description':
-                                               $siteinfo['text'] = trim($meta_tag['content']);
-                                               break;
-                                       case 'keywords':
-                                               $keywords = explode(',', $meta_tag['content']);
-                                               break;
-                                       case 'news_keywords':
-                                               $keywords = explode(',', $meta_tag['content']);
-                                               break;
-                               }
+                       if (empty($meta_tag['content'])) {
+                               continue;
                        }
-                       if ($siteinfo['type'] == 'summary') {
-                               $siteinfo['type'] = 'link';
+
+                       $meta_tag['content'] = trim(html_entity_decode($meta_tag['content'], ENT_QUOTES, 'UTF-8'));
+
+                       switch (strtolower($meta_tag['name'])) {
+                               case 'fulltitle':
+                                       $siteinfo['title'] = trim($meta_tag['content']);
+                                       break;
+                               case 'description':
+                                       $siteinfo['text'] = trim($meta_tag['content']);
+                                       break;
+                               case 'thumbnail':
+                                       $siteinfo['image'] = $meta_tag['content'];
+                                       break;
+                               case 'twitter:image':
+                                       $siteinfo['image'] = $meta_tag['content'];
+                                       break;
+                               case 'twitter:image:src':
+                                       $siteinfo['image'] = $meta_tag['content'];
+                                       break;
+                               case 'twitter:card':
+                                       // Detect photo pages
+                                       if ($meta_tag['content'] == 'summary_large_image') {
+                                               $siteinfo['type'] = 'photo';
+                                       }
+                                       break;
+                               case 'twitter:description':
+                                       $siteinfo['text'] = trim($meta_tag['content']);
+                                       break;
+                               case 'twitter:title':
+                                       $siteinfo['title'] = trim($meta_tag['content']);
+                                       break;
+                               case 'dc.title':
+                                       $siteinfo['title'] = trim($meta_tag['content']);
+                                       break;
+                               case 'dc.description':
+                                       $siteinfo['text'] = trim($meta_tag['content']);
+                                       break;
+                               case 'keywords':
+                                       $keywords = explode(',', $meta_tag['content']);
+                                       break;
+                               case 'news_keywords':
+                                       $keywords = explode(',', $meta_tag['content']);
+                                       break;
                        }
                }
 
@@ -332,7 +333,12 @@ class ParseUrl
                        }
                }
 
-               if ((@$siteinfo['image'] == '') && !$no_guessing) {
+               // Prevent to have a photo type without an image
+               if ((empty($siteinfo['image']) || !empty($siteinfo['text'])) && ($siteinfo['type'] == 'photo')) {
+                       $siteinfo['type'] = 'link';
+               }
+
+               if (empty($siteinfo['image']) && !$no_guessing) {
                        $list = $xpath->query('//img[@src]');
                        foreach ($list as $node) {
                                $img_tag = [];
@@ -481,21 +487,23 @@ class ParseUrl
 
                $complete = $schemearr["scheme"]."://".$schemearr["host"];
 
-               if (@$schemearr["port"] != "") {
+               if (!empty($schemearr["port"])) {
                        $complete .= ":".$schemearr["port"];
                }
 
-               if (strpos($urlarr["path"], "/") !== 0) {
-                       $complete .= "/";
-               }
+               if (!empty($urlarr["path"])) {
+                       if (strpos($urlarr["path"], "/") !== 0) {
+                               $complete .= "/";
+                       }
 
-               $complete .= $urlarr["path"];
+                       $complete .= $urlarr["path"];
+               }
 
-               if (@$urlarr["query"] != "") {
+               if (!empty($urlarr["query"])) {
                        $complete .= "?".$urlarr["query"];
                }
 
-               if (@$urlarr["fragment"] != "") {
+               if (!empty($urlarr["fragment"])) {
                        $complete .= "#".$urlarr["fragment"];
                }