]> git.mxchange.org Git - friendica.git/commitdiff
Moving the clean up code to a central place.
authorMichael Vogel <icarus@dabo.de>
Mon, 8 Dec 2014 21:37:49 +0000 (22:37 +0100)
committerMichael Vogel <icarus@dabo.de>
Mon, 8 Dec 2014 21:37:49 +0000 (22:37 +0100)
include/Scrape.php
library/HTML5/Parser.php

index ad9e030a39b83872500b7dc0c029ed65b1051299..99784af336120cf08e0a76ac306947712309de3e 100644 (file)
@@ -263,11 +263,6 @@ function scrape_feed($url) {
        }
 
        try {
-               // Cleanup invalid HTML
-               $doc = new DOMDocument();
-               @$doc->loadHTML($s);
-                $s = $doc->saveHTML();
-
                $dom = HTML5_Parser::parse($s);
        } catch (DOMException $e) {
                logger('scrape_feed: parse error: ' . $e);
index 5f9ca560e594c2732ac4c0157ef8b61d420e14e0..c7faf875ad326304c44f65aa3f64ff63b07924c6 100644 (file)
@@ -17,6 +17,12 @@ class HTML5_Parser
      * @return Parsed HTML as DOMDocument
      */
     static public function parse($text, $builder = null) {
+
+       // Cleanup invalid HTML
+       $doc = new DOMDocument();
+       @$doc->loadHTML($text);
+       $text = $doc->saveHTML();
+
         $tokenizer = new HTML5_Tokenizer($text, $builder);
         $tokenizer->parse();
         return $tokenizer->save();