]> git.mxchange.org Git - friendica.git/commitdiff
Cleanup the HTML before parsing it.
authorMichael Vogel <icarus@dabo.de>
Mon, 8 Dec 2014 13:26:44 +0000 (14:26 +0100)
committerMichael Vogel <icarus@dabo.de>
Mon, 8 Dec 2014 13:26:44 +0000 (14:26 +0100)
include/Scrape.php

index 99784af336120cf08e0a76ac306947712309de3e..ad9e030a39b83872500b7dc0c029ed65b1051299 100644 (file)
@@ -263,6 +263,11 @@ function scrape_feed($url) {
        }
 
        try {
+               // Cleanup invalid HTML
+               $doc = new DOMDocument();
+               @$doc->loadHTML($s);
+                $s = $doc->saveHTML();
+
                $dom = HTML5_Parser::parse($s);
        } catch (DOMException $e) {
                logger('scrape_feed: parse error: ' . $e);