]> git.mxchange.org Git - friendica.git/commitdiff
purify html before trying to parse wild urls. This way at least it should parse.
authorFriendika <info@friendika.com>
Tue, 5 Jul 2011 06:02:04 +0000 (23:02 -0700)
committerFriendika <info@friendika.com>
Tue, 5 Jul 2011 06:02:04 +0000 (23:02 -0700)
mod/parse_url.php

index 15a6aced0e0fda8fcff6d765c17aea3cf94baa00..ec28d74111817db7fcc540afcb3a769a5c8ee729 100644 (file)
@@ -1,6 +1,7 @@
 <?php
 
 require_once('library/HTML5/Parser.php');
+require_once('library/HTMLPurifier.auto.php');
 
 function parse_url_content(&$a) {
 
@@ -31,16 +32,25 @@ function parse_url_content(&$a) {
                killme();
        }
 
+       logger('parse_url: data: ' . $s, LOGGER_DATA);
 
        if(! $s) {
                echo sprintf($template,$url,$url,'');
                killme();
        }
 
+       $config = HTMLPurifier_Config::createDefault();
+       $config->set('Cache.DefinitionImpl', null);
+
+       $purifier = new HTMLPurifier($config);
+       $s = $purifier->purify($s);
+
        $dom = @HTML5_Parser::parse($s);
 
-       if(! $dom)
-               return $ret;
+       if(! $dom) {
+               echo sprintf($template,$url,$url,'');
+               killme();
+       }
 
        $items = $dom->getElementsByTagName('title');
 
@@ -51,7 +61,6 @@ function parse_url_content(&$a) {
                }
        }
 
-
        $divs = $dom->getElementsByTagName('div');
        if($divs) {
                foreach($divs as $div) {
@@ -94,6 +103,6 @@ function parse_url_content(&$a) {
                $text = '<br />' . $text;
        }
 
-       echo sprintf($template,$url,$title,$text);
+       echo sprintf($template,$url,($title) ? $title : $url,$text);
        killme();
 }