]> git.mxchange.org Git - friendica.git/commitdiff
Add style and script tag escaping when parsing charset in Util\ParseUrl
authorHypolite Petovan <hypolite@mrpetovan.com>
Fri, 25 Sep 2020 10:55:52 +0000 (06:55 -0400)
committerHypolite Petovan <hypolite@mrpetovan.com>
Fri, 25 Sep 2020 10:55:52 +0000 (06:55 -0400)
- Address https://github.com/friendica/friendica/issues/9251#issuecomment-698086677

src/Util/ParseUrl.php

index bb3ebbc10b219863d23dbda496200fd97d2fee7d..ce2a0ea9669591242651081d6c7d24c64e369fcf 100644 (file)
@@ -212,9 +212,13 @@ class ParseUrl
                // Expected forms:
                // - <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
                // - <meta charset="utf-8">
-               if (preg_match('/charset=["\']?([^\'"]*?)[\'"]/', $body, $matches)) {
-                       $charset = trim(trim(trim(array_pop($matches)), ';,'));
-               }
+               // We escape <style> and <script> tags since they can contain irrelevant charset information
+               // (see https://github.com/friendica/friendica/issues/9251#issuecomment-698636806)
+               Strings::performWithEscapedBlocks($body, '#<(?:style|script).*?</(?:style|script)>#ism', function ($body) use (&$charset) {
+                       if (preg_match('/charset=["\']?([^\',"]*?)[\'"]/', $body, $matches)) {
+                               $charset = trim(trim(trim(array_pop($matches)), ';,'));
+                       }
+               });
 
                if ($charset && strtoupper($charset) != 'UTF-8') {
                        // See https://github.com/friendica/friendica/issues/5470#issuecomment-418351211