]> git.mxchange.org Git - friendica.git/blobdiff - include/Scrape.php
Cleanup the HTML before parsing it.
[friendica.git] / include / Scrape.php
index 99784af336120cf08e0a76ac306947712309de3e..ad9e030a39b83872500b7dc0c029ed65b1051299 100644 (file)
@@ -263,6 +263,11 @@ function scrape_feed($url) {
        }
 
        try {
+               // Cleanup invalid HTML
+               $doc = new DOMDocument();
+               @$doc->loadHTML($s);
+                $s = $doc->saveHTML();
+
                $dom = HTML5_Parser::parse($s);
        } catch (DOMException $e) {
                logger('scrape_feed: parse error: ' . $e);