]> git.mxchange.org Git - friendica.git/commitdiff
parse_url: Removing of warnings with invalid charsets
authorMichael Vogel <icarus@dabo.de>
Thu, 24 Jul 2014 20:50:56 +0000 (22:50 +0200)
committerMichael Vogel <icarus@dabo.de>
Thu, 24 Jul 2014 20:50:56 +0000 (22:50 +0200)
mod/parse_url.php

index 7ab71a2fc520d7f7ae68a07711a7cc35fe947633..7f10dce3496cad36b82b137cc87a703b34746db9 100644 (file)
@@ -100,7 +100,7 @@ function parseurl_getsiteinfo($url, $no_guessing = false, $do_oembed = true, $co
        // Fetch the first mentioned charset. Can be in body or header
        $charset = "";
        if (preg_match('/charset=(.*?)['."'".'"\s\n]/', $header, $matches))
-               $charset = trim(array_pop($matches));
+               $charset = trim(trim(trim(array_pop($matches)), ';,'));
 
        if ($charset == "")
                $charset = "utf-8";
@@ -112,7 +112,12 @@ function parseurl_getsiteinfo($url, $no_guessing = false, $do_oembed = true, $co
        else
                $body = $header;
 
-       $body = mb_convert_encoding($body, "UTF-8", $charset);
+       if (($charset != '') AND (strtoupper($charset) != "UTF-8")) {
+               logger("parseurl_getsiteinfo: detected charset ".$charset, LOGGER_DEBUG);
+               //$body = mb_convert_encoding($body, "UTF-8", $charset);
+               $body = iconv($charset, "UTF-8//TRANSLIT", $body);
+       }
+
        $body = mb_convert_encoding($body, 'HTML-ENTITIES', "UTF-8");
 
        $doc = new DOMDocument();