Avoid HTML parsing of an empty body

author Michael <heluecht@pirati.ca>

Sat, 10 Apr 2021 05:46:19 +0000 (05:46 +0000)

committer Michael <heluecht@pirati.ca>

Sat, 10 Apr 2021 05:46:19 +0000 (05:46 +0000)
author Michael <heluecht@pirati.ca>
Sat, 10 Apr 2021 05:46:19 +0000 (05:46 +0000)
committer Michael <heluecht@pirati.ca>
Sat, 10 Apr 2021 05:46:19 +0000 (05:46 +0000)
diff --git a/src/Content/OEmbed.php b/src/Content/OEmbed.php

index c3ce44da00fb33711255695a6858ad8cc718cde7..b0b018c1fb1c18121b16a67cb38ac910d1e96d1f 100644 (file)
--- a/src/Content/OEmbed.php
+++ b/src/Content/OEmbed.php
@@ -98,7 +98,7 @@ class OEmbed
                         if (!in_array($ext, $noexts)) {
                                 // try oembed autodiscovery
                                 $html_text = DI::httpRequest()->fetch($embedurl, 15, 'text/*');
-                               if ($html_text) {
+                               if (!empty($html_text)) {
                                         $dom = new DOMDocument();
                                         if (@$dom->loadHTML($html_text)) {
                                                 $xpath = new DOMXPath($dom);
diff --git a/src/Content/Text/BBCode.php b/src/Content/Text/BBCode.php

index d3633894853279bcee84255b495d40ba27d43fcc..dfc6939dc2bce32eaa7ae87b98dbe1dd4579dde3 100644 (file)
--- a/src/Content/Text/BBCode.php
+++ b/src/Content/Text/BBCode.php
@@ -1178,7 +1178,11 @@ class BBCode
  
                                 // if its not a picture then look if its a page that contains a picture link
                                 $body = DI::httpRequest()->fetch($match[1]);
-
+                               if (empty($body)) {
+                                       DI::cache()->set($cache_key, $text);
+                                       return $text;
+                               }
+               
                                 $doc = new DOMDocument();
                                 @$doc->loadHTML($body);
                                 $xpath = new DOMXPath($doc);
@@ -1214,8 +1218,6 @@ class BBCode
  
         private static function cleanPictureLinksCallback($match)
         {
-               $a = DI::app();
-
                 // When the picture link is the own photo path then we can avoid fetching the link
                 $own_photo_url = preg_quote(Strings::normaliseLink(DI::baseUrl()->get()) . '/photos/');
                 if (preg_match('|' . $own_photo_url . '.*?/image/|', Strings::normaliseLink($match[1]))) {
@@ -1257,6 +1259,10 @@ class BBCode
  
                         // if its not a picture then look if its a page that contains a picture link
                         $body = DI::httpRequest()->fetch($match[1]);
+                       if (empty($body)) {
+                               DI::cache()->set($cache_key, $text);
+                               return $text;
+                       }
  
                         $doc = new DOMDocument();
                         @$doc->loadHTML($body);
diff --git a/src/Content/Text/HTML.php b/src/Content/Text/HTML.php

index 6eb02bfbe5d881ef73ebb1db4705cd41678d0234..51515137e56ddd00f3d576e93f4998b7652bf025 100644 (file)
--- a/src/Content/Text/HTML.php
+++ b/src/Content/Text/HTML.php
@@ -605,6 +605,10 @@ class HTML
                 // Collecting all links
                 $urls = self::collectURLs($message);
  
+               if (empty($message)) {
+                       return '';
+               }
+
                 @$doc->loadHTML($message, LIBXML_HTML_NODEFDTD);
  
                 self::tagToBBCode($doc, 'html', [], '', '');
diff --git a/src/Model/GServer.php b/src/Model/GServer.php

index 6b018c30317213314674afed4bda637180370474..6a1a363e34f96580d991d74b6a8294e2d63715fd 100644 (file)
--- a/src/Model/GServer.php
+++ b/src/Model/GServer.php
@@ -1475,6 +1475,10 @@ class GServer
          */
         private static function analyseRootBody($curlResult, array $serverdata, string $url)
         {
+               if (empty($curlResult->getBody())) {
+                       return $serverdata;
+               }
+
                 $doc = new DOMDocument();
                 @$doc->loadHTML($curlResult->getBody());
                 $xpath = new DOMXPath($doc);
diff --git a/src/Network/Probe.php b/src/Network/Probe.php

index 6359d8607a4699ea1b50f0fdf52d9c12150f8b55..5f3c516820f544f10fe2bfd6e71fcd68797fd822 100644 (file)
--- a/src/Network/Probe.php
+++ b/src/Network/Probe.php
@@ -439,6 +439,9 @@ class Probe
                 }
  
                 $body = $curlResult->getBody();
+               if (empty($body)) {
+                       return false;
+               }
  
                 $doc = new DOMDocument();
                 @$doc->loadHTML($body);
@@ -1274,7 +1277,7 @@ class Probe
                         return [];
                 }
                 $content = $curlResult->getBody();
-               if (!$content) {
+               if (empty($content)) {
                         return [];
                 }
  
@@ -1610,7 +1613,7 @@ class Probe
         private static function pumpioProfileData($profile_link)
         {
                 $curlResult = DI::httpRequest()->get($profile_link);
-               if (!$curlResult->isSuccess()) {
+               if (!$curlResult->isSuccess() || empty($curlResult->getBody())) {
                         return [];
                 }
  
diff --git a/src/Protocol/OStatus.php b/src/Protocol/OStatus.php

index a6d7b79c223fbf0d8050437ec7c12c19c3edc022..13e1d4de2751092eb43c3d0bd0d3d08082e44ae7 100644 (file)
--- a/src/Protocol/OStatus.php
+++ b/src/Protocol/OStatus.php
@@ -735,7 +735,7 @@ class OStatus
  
                 $curlResult = DI::httpRequest()->get($conversation, ['accept_content' => 'application/atom+xml, text/html']);
  
-               if (!$curlResult->isSuccess()) {
+               if (!$curlResult->isSuccess() || empty($curlResult->getBody())) {
                         return;
                 }
  
@@ -928,7 +928,7 @@ class OStatus
                 $stored = false;
                 $curlResult = DI::httpRequest()->get($related, ['accept_content' => 'application/atom+xml, text/html']);
  
-               if (!$curlResult->isSuccess()) {
+               if (!$curlResult->isSuccess() || empty($curlResult->getBody())) {
                         return;
                 }
  
diff --git a/src/Util/ParseUrl.php b/src/Util/ParseUrl.php

index 42c9b5101f41699b9399724859edec30ad55bc63..70ae2310f9b808b897f4bd23c4b4723a755f59e2 100644 (file)
--- a/src/Util/ParseUrl.php
+++ b/src/Util/ParseUrl.php
@@ -214,7 +214,7 @@ class ParseUrl
                 }
  
                 $curlResult = DI::httpRequest()->get($url);
-               if (!$curlResult->isSuccess()) {
+               if (!$curlResult->isSuccess() || empty($curlResult->getBody())) {
                         return $siteinfo;
                 }
author	Michael <heluecht@pirati.ca>
	Sat, 10 Apr 2021 05:46:19 +0000 (05:46 +0000)
committer	Michael <heluecht@pirati.ca>
	Sat, 10 Apr 2021 05:46:19 +0000 (05:46 +0000)
src/Content/OEmbed.php		patch \| blob \| history
src/Content/Text/BBCode.php		patch \| blob \| history
src/Content/Text/HTML.php		patch \| blob \| history
src/Model/GServer.php		patch \| blob \| history
src/Network/Probe.php		patch \| blob \| history
src/Protocol/OStatus.php		patch \| blob \| history
src/Util/ParseUrl.php		patch \| blob \| history