From 620ee6be0bdb4e706abfa2172d3dfb3e2d035e25 Mon Sep 17 00:00:00 2001
From: Michael Vogel <icarus@dabo.de>
Date: Mon, 8 Dec 2014 14:26:44 +0100
Subject: [PATCH] Cleanup the HTML before parsing it.

---
 include/Scrape.php | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/Scrape.php b/include/Scrape.php
index 99784af336..ad9e030a39 100644
--- a/include/Scrape.php
+++ b/include/Scrape.php
@@ -263,6 +263,11 @@ function scrape_feed($url) {
 	}
 
 	try {
+		// Cleanup invalid HTML
+		$doc = new DOMDocument();
+		@$doc->loadHTML($s);
+                $s = $doc->saveHTML();
+
 		$dom = HTML5_Parser::parse($s);
 	} catch (DOMException $e) {
 		logger('scrape_feed: parse error: ' . $e);
-- 
2.39.5