]> git.mxchange.org Git - friendica.git/blobdiff - src/Util/ParseUrl.php
Replace legacy file/category handling
[friendica.git] / src / Util / ParseUrl.php
index 1a81a256ca36ae5a82a92c5b72f5ce334a25c28f..15186b57374bd9246444dcf484f575d6cfec7eaf 100644 (file)
@@ -26,6 +26,7 @@ use DOMXPath;
 use Friendica\Content\OEmbed;
 use Friendica\Core\Hook;
 use Friendica\Core\Logger;
+use Friendica\Database\Database;
 use Friendica\Database\DBA;
 use Friendica\DI;
 
@@ -91,7 +92,7 @@ class ParseUrl
                                'oembed' => $do_oembed, 'content' => serialize($data),
                                'created' => DateTimeFormat::utcNow()
                        ],
-                       true
+                       Database::INSERT_UPDATE
                );
 
                return $data;
@@ -160,16 +161,22 @@ class ParseUrl
                        return $siteinfo;
                }
 
-               $curlResult = DI::httpRequest()->get($url, false, ['content_length' => 1000000]);
+               $curlResult = DI::httpRequest()->get($url);
                if (!$curlResult->isSuccess()) {
                        return $siteinfo;
                }
 
+               // If the file is too large then exit
+               if (($curlResult->getInfo()['download_content_length'] ?? 0) > 1000000) {
+                       return $siteinfo;
+               }
+
                // If it isn't a HTML file then exit
                if (($curlResult->getContentType() != '') && !strstr(strtolower($curlResult->getContentType()), 'html')) {
                        return $siteinfo;
                }
 
+               $header = $curlResult->getHeader();
                $body = $curlResult->getBody();
 
                if ($do_oembed) {
@@ -198,7 +205,7 @@ class ParseUrl
                $charset = '';
                // Look for a charset, first in headers
                // Expected form: Content-Type: text/html; charset=ISO-8859-4
-               if (preg_match('/charset=([a-z0-9-_.\/]+)/i', $curlResult->getContentType(), $matches)) {
+               if (preg_match('/charset=([a-z0-9-_.\/]+)/i', $header, $matches)) {
                        $charset = trim(trim(trim(array_pop($matches)), ';,'));
                }