]> git.mxchange.org Git - friendica.git/commitdiff
Move HTML purification to own method in Content\Text\HTML
authorHypolite Petovan <hypolite@mrpetovan.com>
Sun, 14 Mar 2021 17:40:32 +0000 (13:40 -0400)
committerHypolite Petovan <hypolite@mrpetovan.com>
Sun, 14 Mar 2021 18:19:33 +0000 (14:19 -0400)
src/Content/Text/BBCode.php
src/Content/Text/HTML.php
src/Module/Debug/Babel.php

index 1a930088107b698df9afcab031d4fcba7cf0366d..c6095cd4194ec4fad6f5e8d1de0e253c13db8df6 100644 (file)
@@ -29,12 +29,10 @@ use Friendica\Content\Item;
 use Friendica\Content\OEmbed;
 use Friendica\Content\PageInfo;
 use Friendica\Content\Smilies;
-use Friendica\Content\Text\HTMLPurifier_URIScheme_cid;
 use Friendica\Core\Hook;
 use Friendica\Core\Logger;
 use Friendica\Core\Protocol;
 use Friendica\Core\Renderer;
-use Friendica\Core\System;
 use Friendica\DI;
 use Friendica\Model\Contact;
 use Friendica\Model\Event;
@@ -1877,28 +1875,16 @@ class BBCode
                        $text
                );
 
-               \HTMLPurifier_URISchemeRegistry::instance()->register('cid', new HTMLPurifier_URIScheme_cid());
-
-               $config = \HTMLPurifier_HTML5Config::createDefault();
-               $config->set('HTML.Doctype', 'HTML5');
-               $config->set('HTML.SafeIframe', true);
-               $config->set('URI.SafeIframeRegexp', '%^(?:
-                       https://www.youtube.com/embed/
-                       |
-                       https://player.vimeo.com/video/
-                       |
-                       ' . DI::baseUrl() . '/oembed/ # Has to change with the source in Content\Oembed::iframe
-               )%xi');
-               $config->set('Attr.AllowedRel', [
-                       'noreferrer' => true,
-                       'noopener' => true,
-               ]);
-               $config->set('Attr.AllowedFrameTargets', [
-                       '_blank' => true,
-               ]);
-
-               $HTMLPurifier = new \HTMLPurifier($config);
-               $text = $HTMLPurifier->purify($text);
+               // Default iframe allowed domains/path
+               $allowedIframeDomains = [
+                       DI::baseUrl()->getHostname()
+                       . (DI::baseUrl()->getUrlPath() ? '/' . DI::baseUrl()->getUrlPath() : '')
+                       . '/oembed/', # The path part has to change with the source in Content\Oembed::iframe
+                       'www.youtube.com/embed/',
+                       'player.vimeo.com/video/',
+               ];
+
+               $text = HTML::purify($text, $allowedIframeDomains);
 
                return $text;
        }
index 975be8b1ffadadcb0d64d8785c01daf41e92da26..c77b84db8ab47a485f7219cdea899c2dfc673d11 100644 (file)
@@ -961,4 +961,63 @@ class HTML
        {
                return str_replace('&amp;', '&', $s);
        }
+
+       /**
+        * Clean an HTML text for potentially harmful code
+        *
+        * @param string $text
+        * @param array  $allowedIframeDomains List of allowed iframe source domains without the scheme
+        * @return string
+        */
+       public static function purify(string $text, array $allowedIframeDomains = []): string
+       {
+               // Allows cid: URL scheme
+               \HTMLPurifier_URISchemeRegistry::instance()->register('cid', new HTMLPurifier_URIScheme_cid());
+
+               $config = \HTMLPurifier_HTML5Config::createDefault();
+               $config->set('HTML.Doctype', 'HTML5');
+
+               // Used to remove iframe with src attribute filtered out
+               $config->set('AutoFormat.RemoveEmpty', true);
+
+               $config->set('HTML.SafeIframe', true);
+
+               array_walk($allowedIframeDomains, function (&$domain) {
+                       // Allow the domain and all its eventual sub-domains
+                       $domain = '(?:(?!-)[A-Za-z0-9-]{1,63}(?<!-)\.)*' . preg_quote(trim($domain, '/'), '%');
+               });
+
+               $config->set('URI.SafeIframeRegexp',
+                       '%^https://(?:
+                               ' . implode('|', $allowedIframeDomains) . '
+                       )
+                       (?:/|$) # Prevents bogus domains like youtube.com.fake.tld
+                       %xi'
+               );
+
+               $config->set('Attr.AllowedRel', [
+                       'noreferrer' => true,
+                       'noopener' => true,
+               ]);
+               $config->set('Attr.AllowedFrameTargets', [
+                       '_blank' => true,
+               ]);
+
+               /* Uncomment to debug HTMLPurifier behavior
+               $config->set('Core.CollectErrors', true);
+               $config->set('Core.MaintainLineNumbers', true);
+               */
+
+               $HTMLPurifier = new \HTMLPurifier($config);
+
+               $text = $HTMLPurifier->purify($text);
+
+               /** @var \HTMLPurifier_ErrorCollector $errorCollector */
+               /* Uncomment to debug HTML Purifier behavior
+               $errorCollector = $HTMLPurifier->context->get('ErrorCollector');
+               var_dump($errorCollector->getRaw());
+               */
+
+               return $text;
+       }
 }
index 322b742fbe475d61dae68aa86505ff4a1c1cfc8c..52f6614454f3e8de22a142c7bab93fc8ea100bdb 100644 (file)
@@ -180,9 +180,7 @@ class Babel extends BaseModule
                                                'content' => $html
                                        ];
 
-                                       $config = \HTMLPurifier_Config::createDefault();
-                                       $HTMLPurifier = new \HTMLPurifier($config);
-                                       $purified = $HTMLPurifier->purify($html);
+                                       $purified = Text\HTML::purify($html);
 
                                        $results[] = [
                                                'title'   => DI::l10n()->t('HTML Purified (raw)'),