From: Mikael Nordfeldth
Date: Mon, 10 Jul 2017 11:46:07 +0000 (+0200)
Subject: Updating HTMLPurifier to 4.9.3
X-Git-Url: https://git.mxchange.org/?a=commitdiff_plain;h=a4a6a8469eb720a41d1cddbdbdbc624eb12a1313;p=quix0rs-gnu-social.git
Updating HTMLPurifier to 4.9.3
Source: https://htmlpurifier.org/download
Release date: 2017-06-19
---
diff --git a/extlib/HTMLPurifier/HTMLPurifier.includes.php b/extlib/HTMLPurifier/HTMLPurifier.includes.php
index fdb58c2d37..e8bce5c850 100644
--- a/extlib/HTMLPurifier/HTMLPurifier.includes.php
+++ b/extlib/HTMLPurifier/HTMLPurifier.includes.php
@@ -7,7 +7,7 @@
* primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS
* FILE, changes will be overwritten the next time the script is run.
*
- * @version 4.7.0
+ * @version 4.9.3
*
* @warning
* You must *not* include any other HTML Purifier files before this file,
@@ -137,6 +137,8 @@ require 'HTMLPurifier/AttrTransform/SafeObject.php';
require 'HTMLPurifier/AttrTransform/SafeParam.php';
require 'HTMLPurifier/AttrTransform/ScriptRequired.php';
require 'HTMLPurifier/AttrTransform/TargetBlank.php';
+require 'HTMLPurifier/AttrTransform/TargetNoopener.php';
+require 'HTMLPurifier/AttrTransform/TargetNoreferrer.php';
require 'HTMLPurifier/AttrTransform/Textarea.php';
require 'HTMLPurifier/ChildDef/Chameleon.php';
require 'HTMLPurifier/ChildDef/Custom.php';
@@ -175,6 +177,8 @@ require 'HTMLPurifier/HTMLModule/StyleAttribute.php';
require 'HTMLPurifier/HTMLModule/Tables.php';
require 'HTMLPurifier/HTMLModule/Target.php';
require 'HTMLPurifier/HTMLModule/TargetBlank.php';
+require 'HTMLPurifier/HTMLModule/TargetNoopener.php';
+require 'HTMLPurifier/HTMLModule/TargetNoreferrer.php';
require 'HTMLPurifier/HTMLModule/Text.php';
require 'HTMLPurifier/HTMLModule/Tidy.php';
require 'HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
@@ -225,5 +229,6 @@ require 'HTMLPurifier/URIScheme/https.php';
require 'HTMLPurifier/URIScheme/mailto.php';
require 'HTMLPurifier/URIScheme/news.php';
require 'HTMLPurifier/URIScheme/nntp.php';
+require 'HTMLPurifier/URIScheme/tel.php';
require 'HTMLPurifier/VarParser/Flexible.php';
require 'HTMLPurifier/VarParser/Native.php';
diff --git a/extlib/HTMLPurifier/HTMLPurifier.php b/extlib/HTMLPurifier/HTMLPurifier.php
index c6041bc113..b4605ebc6e 100644
--- a/extlib/HTMLPurifier/HTMLPurifier.php
+++ b/extlib/HTMLPurifier/HTMLPurifier.php
@@ -19,7 +19,7 @@
*/
/*
- HTML Purifier 4.7.0 - Standards Compliant HTML Filtering
+ HTML Purifier 4.9.3 - Standards Compliant HTML Filtering
Copyright (C) 2006-2008 Edward Z. Yang
This library is free software; you can redistribute it and/or
@@ -58,12 +58,12 @@ class HTMLPurifier
* Version of HTML Purifier.
* @type string
*/
- public $version = '4.7.0';
+ public $version = '4.9.3';
/**
* Constant with version of HTML Purifier.
*/
- const VERSION = '4.7.0';
+ const VERSION = '4.9.3';
/**
* Global configuration object.
@@ -104,7 +104,7 @@ class HTMLPurifier
/**
* Initializes the purifier.
*
- * @param HTMLPurifier_Config $config Optional HTMLPurifier_Config object
+ * @param HTMLPurifier_Config|mixed $config Optional HTMLPurifier_Config object
* for all instances of the purifier, if omitted, a default
* configuration is supplied (which can be overridden on a
* per-use basis).
diff --git a/extlib/HTMLPurifier/HTMLPurifier.safe-includes.php b/extlib/HTMLPurifier/HTMLPurifier.safe-includes.php
index 9dea6d1ed5..a3261f8a32 100644
--- a/extlib/HTMLPurifier/HTMLPurifier.safe-includes.php
+++ b/extlib/HTMLPurifier/HTMLPurifier.safe-includes.php
@@ -131,6 +131,8 @@ require_once $__dir . '/HTMLPurifier/AttrTransform/SafeObject.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeParam.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/ScriptRequired.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/TargetBlank.php';
+require_once $__dir . '/HTMLPurifier/AttrTransform/TargetNoopener.php';
+require_once $__dir . '/HTMLPurifier/AttrTransform/TargetNoreferrer.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/Textarea.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Chameleon.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Custom.php';
@@ -169,6 +171,8 @@ require_once $__dir . '/HTMLPurifier/HTMLModule/StyleAttribute.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Tables.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Target.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/TargetBlank.php';
+require_once $__dir . '/HTMLPurifier/HTMLModule/TargetNoopener.php';
+require_once $__dir . '/HTMLPurifier/HTMLModule/TargetNoreferrer.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Text.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
@@ -219,5 +223,6 @@ require_once $__dir . '/HTMLPurifier/URIScheme/https.php';
require_once $__dir . '/HTMLPurifier/URIScheme/mailto.php';
require_once $__dir . '/HTMLPurifier/URIScheme/news.php';
require_once $__dir . '/HTMLPurifier/URIScheme/nntp.php';
+require_once $__dir . '/HTMLPurifier/URIScheme/tel.php';
require_once $__dir . '/HTMLPurifier/VarParser/Flexible.php';
require_once $__dir . '/HTMLPurifier/VarParser/Native.php';
diff --git a/extlib/HTMLPurifier/HTMLPurifier/Arborize.php b/extlib/HTMLPurifier/HTMLPurifier/Arborize.php
index 9e6617be5d..d2e9d22a20 100644
--- a/extlib/HTMLPurifier/HTMLPurifier/Arborize.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/Arborize.php
@@ -19,8 +19,8 @@ class HTMLPurifier_Arborize
if ($token instanceof HTMLPurifier_Token_End) {
$token->start = null; // [MUT]
$r = array_pop($stack);
- assert($r->name === $token->name);
- assert(empty($token->attr));
+ //assert($r->name === $token->name);
+ //assert(empty($token->attr));
$r->endCol = $token->col;
$r->endLine = $token->line;
$r->endArmor = $token->armor;
@@ -32,7 +32,7 @@ class HTMLPurifier_Arborize
$stack[] = $node;
}
}
- assert(count($stack) == 1);
+ //assert(count($stack) == 1);
return $stack[0];
}
diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrCollections.php b/extlib/HTMLPurifier/HTMLPurifier/AttrCollections.php
index 4f6c2e39a2..c7b17cf144 100644
--- a/extlib/HTMLPurifier/HTMLPurifier/AttrCollections.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/AttrCollections.php
@@ -21,6 +21,11 @@ class HTMLPurifier_AttrCollections
* @param HTMLPurifier_HTMLModule[] $modules Hash array of HTMLPurifier_HTMLModule members
*/
public function __construct($attr_types, $modules)
+ {
+ $this->doConstruct($attr_types, $modules);
+ }
+
+ public function doConstruct($attr_types, $modules)
{
// load extensions from the modules
foreach ($modules as $module) {
diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef.php
index 5ac06522b9..739646fa7c 100644
--- a/extlib/HTMLPurifier/HTMLPurifier/AttrDef.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef.php
@@ -86,7 +86,13 @@ abstract class HTMLPurifier_AttrDef
*/
protected function mungeRgb($string)
{
- return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string);
+ $p = '\s*(\d+(\.\d+)?([%]?))\s*';
+
+ if (preg_match('/(rgba|hsla)\(/', $string)) {
+ return preg_replace('/(rgba|hsla)\('.$p.','.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8,\11)', $string);
+ }
+
+ return preg_replace('/(rgb|hsl)\('.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8)', $string);
}
/**
diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS.php
index 02c1641fb2..ad2cb90ad1 100644
--- a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS.php
@@ -25,15 +25,42 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
$css = $this->parseCDATA($css);
$definition = $config->getCSSDefinition();
+ $allow_duplicates = $config->get("CSS.AllowDuplicates");
- // we're going to break the spec and explode by semicolons.
- // This is because semicolon rarely appears in escaped form
- // Doing this is generally flaky but fast
- // IT MIGHT APPEAR IN URIs, see HTMLPurifier_AttrDef_CSSURI
- // for details
- $declarations = explode(';', $css);
+ // According to the CSS2.1 spec, the places where a
+ // non-delimiting semicolon can appear are in strings
+ // escape sequences. So here is some dumb hack to
+ // handle quotes.
+ $len = strlen($css);
+ $accum = "";
+ $declarations = array();
+ $quoted = false;
+ for ($i = 0; $i < $len; $i++) {
+ $c = strcspn($css, ";'\"", $i);
+ $accum .= substr($css, $i, $c);
+ $i += $c;
+ if ($i == $len) break;
+ $d = $css[$i];
+ if ($quoted) {
+ $accum .= $d;
+ if ($d == $quoted) {
+ $quoted = false;
+ }
+ } else {
+ if ($d == ";") {
+ $declarations[] = $accum;
+ $accum = "";
+ } else {
+ $accum .= $d;
+ $quoted = $d;
+ }
+ }
+ }
+ if ($accum != "") $declarations[] = $accum;
+
$propvalues = array();
+ $new_declarations = '';
/**
* Name of the current CSS property being validated.
@@ -83,7 +110,11 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
if ($result === false) {
continue;
}
- $propvalues[$property] = $result;
+ if ($allow_duplicates) {
+ $new_declarations .= "$property:$result;";
+ } else {
+ $propvalues[$property] = $result;
+ }
}
$context->destroy('CurrentCSSProperty');
@@ -92,7 +123,6 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
// slightly inefficient, but it's the only way of getting rid of
// duplicates. Perhaps config to optimize it, but not now.
- $new_declarations = '';
foreach ($propvalues as $prop => $value) {
$new_declarations .= "$prop:$value;";
}
diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Color.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Color.php
index 16d2a6b98c..d7287a00c2 100644
--- a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Color.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Color.php
@@ -6,6 +6,16 @@
class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
{
+ /**
+ * @type HTMLPurifier_AttrDef_CSS_AlphaValue
+ */
+ protected $alpha;
+
+ public function __construct()
+ {
+ $this->alpha = new HTMLPurifier_AttrDef_CSS_AlphaValue();
+ }
+
/**
* @param string $color
* @param HTMLPurifier_Config $config
@@ -29,59 +39,104 @@ class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
return $colors[$lower];
}
- if (strpos($color, 'rgb(') !== false) {
- // rgb literal handling
+ if (preg_match('#(rgb|rgba|hsl|hsla)\(#', $color, $matches) === 1) {
$length = strlen($color);
if (strpos($color, ')') !== $length - 1) {
return false;
}
- $triad = substr($color, 4, $length - 4 - 1);
- $parts = explode(',', $triad);
- if (count($parts) !== 3) {
+
+ // get used function : rgb, rgba, hsl or hsla
+ $function = $matches[1];
+
+ $parameters_size = 3;
+ $alpha_channel = false;
+ if (substr($function, -1) === 'a') {
+ $parameters_size = 4;
+ $alpha_channel = true;
+ }
+
+ /*
+ * Allowed types for values :
+ * parameter_position => [type => max_value]
+ */
+ $allowed_types = array(
+ 1 => array('percentage' => 100, 'integer' => 255),
+ 2 => array('percentage' => 100, 'integer' => 255),
+ 3 => array('percentage' => 100, 'integer' => 255),
+ );
+ $allow_different_types = false;
+
+ if (strpos($function, 'hsl') !== false) {
+ $allowed_types = array(
+ 1 => array('integer' => 360),
+ 2 => array('percentage' => 100),
+ 3 => array('percentage' => 100),
+ );
+ $allow_different_types = true;
+ }
+
+ $values = trim(str_replace($function, '', $color), ' ()');
+
+ $parts = explode(',', $values);
+ if (count($parts) !== $parameters_size) {
return false;
}
- $type = false; // to ensure that they're all the same type
+
+ $type = false;
$new_parts = array();
+ $i = 0;
+
foreach ($parts as $part) {
+ $i++;
$part = trim($part);
+
if ($part === '') {
return false;
}
- $length = strlen($part);
- if ($part[$length - 1] === '%') {
- // handle percents
- if (!$type) {
- $type = 'percentage';
- } elseif ($type !== 'percentage') {
+
+ // different check for alpha channel
+ if ($alpha_channel === true && $i === count($parts)) {
+ $result = $this->alpha->validate($part, $config, $context);
+
+ if ($result === false) {
return false;
}
- $num = (float)substr($part, 0, $length - 1);
- if ($num < 0) {
- $num = 0;
- }
- if ($num > 100) {
- $num = 100;
- }
- $new_parts[] = "$num%";
+
+ $new_parts[] = (string)$result;
+ continue;
+ }
+
+ if (substr($part, -1) === '%') {
+ $current_type = 'percentage';
} else {
- // handle integers
- if (!$type) {
- $type = 'integer';
- } elseif ($type !== 'integer') {
- return false;
- }
- $num = (int)$part;
- if ($num < 0) {
- $num = 0;
- }
- if ($num > 255) {
- $num = 255;
- }
- $new_parts[] = (string)$num;
+ $current_type = 'integer';
+ }
+
+ if (!array_key_exists($current_type, $allowed_types[$i])) {
+ return false;
+ }
+
+ if (!$type) {
+ $type = $current_type;
+ }
+
+ if ($allow_different_types === false && $type != $current_type) {
+ return false;
+ }
+
+ $max_value = $allowed_types[$i][$current_type];
+
+ if ($current_type == 'integer') {
+ // Return value between range 0 -> $max_value
+ $new_parts[] = (int)max(min($part, $max_value), 0);
+ } elseif ($current_type == 'percentage') {
+ $new_parts[] = (float)max(min(rtrim($part, '%'), $max_value), 0) . '%';
}
}
- $new_triad = implode(',', $new_parts);
- $color = "rgb($new_triad)";
+
+ $new_values = implode(',', $new_parts);
+
+ $color = $function . '(' . $new_values . ')';
} else {
// hexadecimal handling
if ($color[0] === '#') {
@@ -100,6 +155,7 @@ class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
}
return $color;
}
+
}
// vim: et sw=4 sts=4
diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/FontFamily.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/FontFamily.php
index 86101020dc..74e24c8816 100644
--- a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/FontFamily.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/FontFamily.php
@@ -130,6 +130,8 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
// . See
// the CSS3 spec for more examples:
//
+ // You can see live samples of these on the Internet:
+ //
// However, most of these fonts have ASCII equivalents:
// for example, 'MS Mincho', and it's considered
// professional to use ASCII font names instead of
diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/URI.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/URI.php
index f9434230e2..6617acace5 100644
--- a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/URI.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/URI.php
@@ -33,6 +33,9 @@ class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
return false;
}
$uri_string = substr($uri_string, 4);
+ if (strlen($uri_string) == 0) {
+ return false;
+ }
$new_length = strlen($uri_string) - 1;
if ($uri_string[$new_length] != ')') {
return false;
diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/ID.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/ID.php
index 3d86efb44c..4ba45610fe 100644
--- a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/ID.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/ID.php
@@ -72,18 +72,26 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
// we purposely avoid using regex, hopefully this is faster
- if (ctype_alpha($id)) {
- $result = true;
- } else {
- if (!ctype_alpha(@$id[0])) {
+ if ($config->get('Attr.ID.HTML5') === true) {
+ if (preg_match('/[\t\n\x0b\x0c ]/', $id)) {
return false;
}
- // primitive style of regexps, I suppose
- $trim = trim(
- $id,
- 'A..Za..z0..9:-._'
- );
- $result = ($trim === '');
+ } else {
+ if (ctype_alpha($id)) {
+ // OK
+ } else {
+ if (!ctype_alpha(@$id[0])) {
+ return false;
+ }
+ // primitive style of regexps, I suppose
+ $trim = trim(
+ $id,
+ 'A..Za..z0..9:-._'
+ );
+ if ($trim !== '') {
+ return false;
+ }
+ }
}
$regexp = $config->get('Attr.IDBlacklistRegexp');
@@ -91,14 +99,14 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
return false;
}
- if (!$this->selector && $result) {
+ if (!$this->selector) {
$id_accumulator->add($id);
}
// if no change was made to the ID, return the result
// else, return the new id if stripping whitespace made it
// valid, or return false.
- return $result ? $id : false;
+ return $id;
}
}
diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/URI/Host.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/URI/Host.php
index e7df800b1e..3b4d186743 100644
--- a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/URI/Host.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/URI/Host.php
@@ -76,24 +76,33 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
// fairly well supported.
$underscore = $config->get('Core.AllowHostnameUnderscore') ? '_' : '';
+ // Based off of RFC 1738, but amended so that
+ // as per RFC 3696, the top label need only not be all numeric.
// The productions describing this are:
$a = '[a-z]'; // alpha
$an = '[a-z0-9]'; // alphanum
$and = "[a-z0-9-$underscore]"; // alphanum | "-"
// domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
- $domainlabel = "$an($and*$an)?";
- // toplabel = alpha | alpha *( alphanum | "-" ) alphanum
- $toplabel = "$a($and*$an)?";
+ $domainlabel = "$an(?:$and*$an)?";
+ // AMENDED as per RFC 3696
+ // toplabel = alphanum | alphanum *( alphanum | "-" ) alphanum
+ // side condition: not all numeric
+ $toplabel = "$an(?:$and*$an)?";
// hostname = *( domainlabel "." ) toplabel [ "." ]
- if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
- return $string;
+ if (preg_match("/^(?:$domainlabel\.)*($toplabel)\.?$/i", $string, $matches)) {
+ if (!ctype_digit($matches[1])) {
+ return $string;
+ }
}
+ // PHP 5.3 and later support this functionality natively
+ if (function_exists('idn_to_ascii')) {
+ $string = idn_to_ascii($string);
+
// If we have Net_IDNA2 support, we can support IRIs by
// punycoding them. (This is the most portable thing to do,
// since otherwise we have to assume browsers support
-
- if ($config->get('Core.EnableIDNA')) {
+ } elseif ($config->get('Core.EnableIDNA')) {
$idna = new Net_IDNA2(array('encoding' => 'utf8', 'overlong' => false, 'strict' => true));
// we need to encode each period separately
$parts = explode('.', $string);
@@ -114,13 +123,14 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
}
}
$string = implode('.', $new_parts);
- if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
- return $string;
- }
} catch (Exception $e) {
// XXX error reporting
}
}
+ // Try again
+ if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
+ return $string;
+ }
return false;
}
}
diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/ImgRequired.php b/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/ImgRequired.php
index 7df6cb3e1b..235ebb34b6 100644
--- a/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/ImgRequired.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/ImgRequired.php
@@ -32,8 +32,7 @@ class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform
if ($src) {
$alt = $config->get('Attr.DefaultImageAlt');
if ($alt === null) {
- // truncate if the alt is too long
- $attr['alt'] = substr(basename($attr['src']), 0, 40);
+ $attr['alt'] = basename($attr['src']);
} else {
$attr['alt'] = $alt;
}
diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/TargetNoopener.php b/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/TargetNoopener.php
new file mode 100644
index 0000000000..1db3c6c09e
--- /dev/null
+++ b/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/TargetNoopener.php
@@ -0,0 +1,37 @@
+get('CSS.MaxImgLength');
+ $this->info['min-width'] =
+ $this->info['max-width'] =
+ $this->info['min-height'] =
+ $this->info['max-height'] =
$this->info['width'] =
$this->info['height'] =
$max === null ?
@@ -370,6 +374,19 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
);
$this->info['page-break-inside'] = new HTMLPurifier_AttrDef_Enum(array('auto', 'avoid'));
+ $border_radius = new HTMLPurifier_AttrDef_CSS_Composite(
+ array(
+ new HTMLPurifier_AttrDef_CSS_Percentage(true), // disallow negative
+ new HTMLPurifier_AttrDef_CSS_Length('0') // disallow negative
+ ));
+
+ $this->info['border-top-left-radius'] =
+ $this->info['border-top-right-radius'] =
+ $this->info['border-bottom-right-radius'] =
+ $this->info['border-bottom-left-radius'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_radius, 2);
+ // TODO: support SLASH syntax
+ $this->info['border-radius'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_radius, 4);
+
}
/**
diff --git a/extlib/HTMLPurifier/HTMLPurifier/ChildDef/List.php b/extlib/HTMLPurifier/HTMLPurifier/ChildDef/List.php
index 891b9f6f5b..4fc70e0efa 100644
--- a/extlib/HTMLPurifier/HTMLPurifier/ChildDef/List.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/ChildDef/List.php
@@ -38,13 +38,19 @@ class HTMLPurifier_ChildDef_List extends HTMLPurifier_ChildDef
return false;
}
+ // if li is not allowed, delete parent node
+ if (!isset($config->getHTMLDefinition()->info['li'])) {
+ trigger_error("Cannot allow ul/ol without allowing li", E_USER_WARNING);
+ return false;
+ }
+
// the new set of children
$result = array();
// a little sanity check to make sure it's not ALL whitespace
$all_whitespace = true;
- $current_li = false;
+ $current_li = null;
foreach ($children as $node) {
if (!empty($node->is_whitespace)) {
@@ -65,7 +71,7 @@ class HTMLPurifier_ChildDef_List extends HTMLPurifier_ChildDef
// to handle non-list elements; non-list elements should
// not be appended to an existing li; only li created
// for non-list. This distinction is not currently made.
- if ($current_li === false) {
+ if ($current_li === null) {
$current_li = new HTMLPurifier_Node_Element('li');
$result[] = $current_li;
}
diff --git a/extlib/HTMLPurifier/HTMLPurifier/ChildDef/Table.php b/extlib/HTMLPurifier/HTMLPurifier/ChildDef/Table.php
index 3e4a0f2182..cb6b3e6cdc 100644
--- a/extlib/HTMLPurifier/HTMLPurifier/ChildDef/Table.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/ChildDef/Table.php
@@ -203,7 +203,7 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
$current_tr_tbody->children[] = $node;
break;
case '#PCDATA':
- assert($node->is_whitespace);
+ //assert($node->is_whitespace);
if ($current_tr_tbody === null) {
$ret[] = $node;
} else {
diff --git a/extlib/HTMLPurifier/HTMLPurifier/Config.php b/extlib/HTMLPurifier/HTMLPurifier/Config.php
index 2b2db0c264..3648364b30 100644
--- a/extlib/HTMLPurifier/HTMLPurifier/Config.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/Config.php
@@ -21,7 +21,7 @@ class HTMLPurifier_Config
* HTML Purifier's version
* @type string
*/
- public $version = '4.7.0';
+ public $version = '4.9.3';
/**
* Whether or not to automatically finalize
@@ -333,7 +333,7 @@ class HTMLPurifier_Config
}
// Raw type might be negative when using the fully optimized form
- // of stdclass, which indicates allow_null == true
+ // of stdClass, which indicates allow_null == true
$rtype = is_int($def) ? $def : $def->type;
if ($rtype < 0) {
$type = -$rtype;
diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema.php b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema.php
index bfbb0f92f5..655c0e97ae 100644
--- a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema.php
@@ -24,11 +24,11 @@ class HTMLPurifier_ConfigSchema
*
* array(
* 'Namespace' => array(
- * 'Directive' => new stdclass(),
+ * 'Directive' => new stdClass(),
* )
* )
*
- * The stdclass may have the following properties:
+ * The stdClass may have the following properties:
*
* - If isAlias isn't set:
* - type: Integer type of directive, see HTMLPurifier_VarParser for definitions
@@ -39,8 +39,8 @@ class HTMLPurifier_ConfigSchema
* - namespace: Namespace this directive aliases to
* - name: Directive name this directive aliases to
*
- * In certain degenerate cases, stdclass will actually be an integer. In
- * that case, the value is equivalent to an stdclass with the type
+ * In certain degenerate cases, stdClass will actually be an integer. In
+ * that case, the value is equivalent to an stdClass with the type
* property set to the integer. If the integer is negative, type is
* equal to the absolute value of integer, and allow_null is true.
*
@@ -105,7 +105,7 @@ class HTMLPurifier_ConfigSchema
*/
public function add($key, $default, $type, $allow_null)
{
- $obj = new stdclass();
+ $obj = new stdClass();
$obj->type = is_int($type) ? $type : HTMLPurifier_VarParser::$types[$type];
if ($allow_null) {
$obj->allow_null = true;
@@ -152,14 +152,14 @@ class HTMLPurifier_ConfigSchema
*/
public function addAlias($key, $new_key)
{
- $obj = new stdclass;
+ $obj = new stdClass;
$obj->key = $new_key;
$obj->isAlias = true;
$this->info[$key] = $obj;
}
/**
- * Replaces any stdclass that only has the type property with type integer.
+ * Replaces any stdClass that only has the type property with type integer.
*/
public function postProcess()
{
diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema.ser b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema.ser
index 1e6ccd2275..371e948f1c 100644
Binary files a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema.ser and b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema.ser differ
diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.ID.HTML5.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.ID.HTML5.txt
new file mode 100644
index 0000000000..735d4b7a10
--- /dev/null
+++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.ID.HTML5.txt
@@ -0,0 +1,10 @@
+Attr.ID.HTML5
+TYPE: bool/null
+DEFAULT: null
+VERSION: 4.8.0
+--DESCRIPTION--
+In HTML5, restrictions on the format of the id attribute have been significantly
+relaxed, such that any string is valid so long as it contains no spaces and
+is at least one character. In lieu of a general HTML5 compatibility flag,
+set this configuration directive to true to use the relaxed rules.
+--# vim: et sw=4 sts=4
diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/CSS.AllowDuplicates.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/CSS.AllowDuplicates.txt
new file mode 100644
index 0000000000..4d054b1f07
--- /dev/null
+++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/CSS.AllowDuplicates.txt
@@ -0,0 +1,11 @@
+CSS.AllowDuplicates
+TYPE: bool
+DEFAULT: false
+VERSION: 4.8.0
+--DESCRIPTION--
+
+ By default, HTML Purifier removes duplicate CSS properties,
+ like color:red; color:blue
. If this is set to
+ true, duplicate properties are allowed.
+
+--# vim: et sw=4 sts=4
diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Cache.SerializerPermissions.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Cache.SerializerPermissions.txt
index b2b83d9ab6..2e0cc81044 100644
--- a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Cache.SerializerPermissions.txt
+++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Cache.SerializerPermissions.txt
@@ -1,5 +1,5 @@
Cache.SerializerPermissions
-TYPE: int
+TYPE: int/null
VERSION: 4.3.0
DEFAULT: 0755
--DESCRIPTION--
@@ -8,4 +8,9 @@ DEFAULT: 0755
Directory permissions of the files and directories created inside
the DefinitionCache/Serializer or other custom serializer path.
+
+ In HTML Purifier 4.8.0, this also supports NULL
,
+ which means that no chmod'ing or directory creation shall
+ occur.
+
--# vim: et sw=4 sts=4
diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.AggressivelyRemoveScript.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.AggressivelyRemoveScript.txt
new file mode 100644
index 0000000000..b2b6ab1496
--- /dev/null
+++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.AggressivelyRemoveScript.txt
@@ -0,0 +1,16 @@
+Core.AggressivelyRemoveScript
+TYPE: bool
+VERSION: 4.9.0
+DEFAULT: true
+--DESCRIPTION--
+
+ This directive enables aggressive pre-filter removal of
+ script tags. This is not necessary for security,
+ but it can help work around a bug in libxml where embedded
+ HTML elements inside script sections cause the parser to
+ choke. To revert to pre-4.9.0 behavior, set this to false.
+ This directive has no effect if %Core.Trusted is true,
+ %Core.RemoveScriptContents is false, or %Core.HiddenElements
+ does not contain script.
+
+--# vim: et sw=4 sts=4
diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.LegacyEntityDecoder.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.LegacyEntityDecoder.txt
new file mode 100644
index 0000000000..392b436493
--- /dev/null
+++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.LegacyEntityDecoder.txt
@@ -0,0 +1,36 @@
+Core.LegacyEntityDecoder
+TYPE: bool
+VERSION: 4.9.0
+DEFAULT: false
+--DESCRIPTION--
+
+ Prior to HTML Purifier 4.9.0, entities were decoded by performing
+ a global search replace for all entities whose decoded versions
+ did not have special meanings under HTML, and replaced them with
+ their decoded versions. We would match all entities, even if they did
+ not have a trailing semicolon, but only if there weren't any trailing
+ alphanumeric characters.
+
+
+Original | Text | Attribute |
+¥ | ¥ | ¥ |
+¥ | ¥ | ¥ |
+¥a | ¥a | ¥a |
+¥= | ¥= | ¥= |
+
+
+ In HTML Purifier 4.9.0, we changed the behavior of entity parsing
+ to match entities that had missing trailing semicolons in less
+ cases, to more closely match HTML5 parsing behavior:
+
+
+Original | Text | Attribute |
+¥ | ¥ | ¥ |
+¥ | ¥ | ¥ |
+¥a | ¥a | ¥a |
+¥= | ¥= | ¥= |
+
+
+ This flag reverts back to pre-HTML Purifier 4.9.0 behavior.
+
+--# vim: et sw=4 sts=4
diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.TargetNoopener.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.TargetNoopener.txt
new file mode 100644
index 0000000000..dd514c0def
--- /dev/null
+++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.TargetNoopener.txt
@@ -0,0 +1,10 @@
+--# vim: et sw=4 sts=4
+HTML.TargetNoopener
+TYPE: bool
+VERSION: 4.8.0
+DEFAULT: TRUE
+--DESCRIPTION--
+If enabled, noopener rel attributes are added to links which have
+a target attribute associated with them. This prevents malicious
+destinations from overwriting the original window.
+--# vim: et sw=4 sts=4
diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.TargetNoreferrer.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.TargetNoreferrer.txt
new file mode 100644
index 0000000000..cb5a0b0e5e
--- /dev/null
+++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.TargetNoreferrer.txt
@@ -0,0 +1,9 @@
+HTML.TargetNoreferrer
+TYPE: bool
+VERSION: 4.8.0
+DEFAULT: TRUE
+--DESCRIPTION--
+If enabled, noreferrer rel attributes are added to links which have
+a target attribute associated with them. This prevents malicious
+destinations from overwriting the original window.
+--# vim: et sw=4 sts=4
diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.AllowedSchemes.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.AllowedSchemes.txt
index 666635a5ff..eb97307e20 100644
--- a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.AllowedSchemes.txt
+++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.AllowedSchemes.txt
@@ -8,6 +8,7 @@ array (
'ftp' => true,
'nntp' => true,
'news' => true,
+ 'tel' => true,
)
--DESCRIPTION--
Whitelist that defines the schemes that a URI is allowed to have. This
diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.DefaultScheme.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.DefaultScheme.txt
index 728e378cbe..834bc08c0b 100644
--- a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.DefaultScheme.txt
+++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.DefaultScheme.txt
@@ -1,5 +1,5 @@
URI.DefaultScheme
-TYPE: string
+TYPE: string/null
DEFAULT: 'http'
--DESCRIPTION--
@@ -7,4 +7,9 @@ DEFAULT: 'http'
Defines through what scheme the output will be served, in order to
select the proper object validator when no scheme information is present.
+
+
+ Starting with HTML Purifier 4.9.0, the default scheme can be null, in
+ which case we reject all URIs which do not have explicit schemes.
+
--# vim: et sw=4 sts=4
diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.Munge.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.Munge.txt
index 179f94eb03..58c81dcc44 100644
--- a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.Munge.txt
+++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.Munge.txt
@@ -9,75 +9,75 @@ DEFAULT: NULL
absolute URIs into another URI, usually a URI redirection service.
This directive accepts a URI, formatted with a %s
where
the url-encoded original URI should be inserted (sample:
- https://searx.laquadrature.net/?q=%s
).
-
-
+ http://www.google.com/url?q=%s
).
+
+
Uses for this directive:
-
-
+
+
-
- Prevent PageRank leaks, while being fairly transparent
- to users (you may also want to add some client side JavaScript to
- override the text in the statusbar). Notice:
- Many security experts believe that this form of protection does not deter spam-bots.
+ Prevent PageRank leaks, while being fairly transparent
+ to users (you may also want to add some client side JavaScript to
+ override the text in the statusbar). Notice:
+ Many security experts believe that this form of protection does not deter spam-bots.
-
- Redirect users to a splash page telling them they are leaving your
- website. While this is poor usability practice, it is often mandated
- in corporate environments.
+ Redirect users to a splash page telling them they are leaving your
+ website. While this is poor usability practice, it is often mandated
+ in corporate environments.
-
-
+
+
Prior to HTML Purifier 3.1.1, this directive also enabled the munging
of browsable external resources, which could break things if your redirection
script was a splash page or used meta
tags. To revert to
previous behavior, please use %URI.MungeResources.
-
-
+
+
You may want to also use %URI.MungeSecretKey along with this directive
in order to enforce what URIs your redirector script allows. Open
redirector scripts can be a security risk and negatively affect the
reputation of your domain name.
-
-
+
+
Starting with HTML Purifier 3.1.1, there is also these substitutions:
-
-
+
+
-
- Key |
- Description |
- Example <a href=""> |
-
+
+ Key |
+ Description |
+ Example <a href=""> |
+
-
- %r |
- 1 - The URI embeds a resource (blank) - The URI is merely a link |
- |
-
-
- %n |
- The name of the tag this URI came from |
- a |
-
-
- %m |
- The name of the attribute this URI came from |
- href |
-
-
- %p |
- The name of the CSS property this URI came from, or blank if irrelevant |
- |
-
+
+ %r |
+ 1 - The URI embeds a resource (blank) - The URI is merely a link |
+ |
+
+
+ %n |
+ The name of the tag this URI came from |
+ a |
+
+
+ %m |
+ The name of the attribute this URI came from |
+ href |
+
+
+ %p |
+ The name of the CSS property this URI came from, or blank if irrelevant |
+ |
+
-
-
+
+
Admittedly, these letters are somewhat arbitrary; the only stipulation
was that they couldn't be a through f. r is for resource (I would have preferred
e, but you take what you can get), n is for name, m
was picked because it came after n (and I couldn't use a), p is for
property.
-
- --# vim: et sw=4 sts=4
+
+--# vim: et sw=4 sts=4
diff --git a/extlib/HTMLPurifier/HTMLPurifier/DefinitionCache.php b/extlib/HTMLPurifier/HTMLPurifier/DefinitionCache.php
index 67bb5b1e69..9aa8ff354f 100644
--- a/extlib/HTMLPurifier/HTMLPurifier/DefinitionCache.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/DefinitionCache.php
@@ -118,7 +118,7 @@ abstract class HTMLPurifier_DefinitionCache
/**
* Clears all expired (older version or revision) objects from cache
- * @note Be carefuly implementing this method as flush. Flush must
+ * @note Be careful implementing this method as flush. Flush must
* not interfere with other Definition types, and cleanup()
* should not be repeatedly called by userland code.
* @param HTMLPurifier_Config $config
diff --git a/extlib/HTMLPurifier/HTMLPurifier/DefinitionCache/Serializer.php b/extlib/HTMLPurifier/HTMLPurifier/DefinitionCache/Serializer.php
index ce268d91b4..952e48d470 100644
--- a/extlib/HTMLPurifier/HTMLPurifier/DefinitionCache/Serializer.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/DefinitionCache/Serializer.php
@@ -97,6 +97,12 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac
}
$dir = $this->generateDirectoryPath($config);
$dh = opendir($dir);
+ // Apparently, on some versions of PHP, readdir will return
+ // an empty string if you pass an invalid argument to readdir.
+ // So you need this test. See #49.
+ if (false === $dh) {
+ return false;
+ }
while (false !== ($filename = readdir($dh))) {
if (empty($filename)) {
continue;
@@ -106,6 +112,8 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac
}
unlink($dir . '/' . $filename);
}
+ closedir($dh);
+ return true;
}
/**
@@ -119,6 +127,10 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac
}
$dir = $this->generateDirectoryPath($config);
$dh = opendir($dir);
+ // See #49 (and above).
+ if (false === $dh) {
+ return false;
+ }
while (false !== ($filename = readdir($dh))) {
if (empty($filename)) {
continue;
@@ -131,6 +143,8 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac
unlink($dir . '/' . $filename);
}
}
+ closedir($dh);
+ return true;
}
/**
@@ -186,11 +200,9 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac
if ($result !== false) {
// set permissions of the new file (no execute)
$chmod = $config->get('Cache.SerializerPermissions');
- if (!$chmod) {
- $chmod = 0644; // invalid config or simpletest
+ if ($chmod !== null) {
+ chmod($file, $chmod & 0666);
}
- $chmod = $chmod & 0666;
- chmod($file, $chmod);
}
return $result;
}
@@ -204,8 +216,10 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac
{
$directory = $this->generateDirectoryPath($config);
$chmod = $config->get('Cache.SerializerPermissions');
- if (!$chmod) {
- $chmod = 0755; // invalid config or simpletest
+ if ($chmod === null) {
+ // TODO: This races
+ if (is_dir($directory)) return true;
+ return mkdir($directory);
}
if (!is_dir($directory)) {
$base = $this->generateBaseDirectoryPath($config);
@@ -219,15 +233,16 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac
} elseif (!$this->_testPermissions($base, $chmod)) {
return false;
}
- mkdir($directory, $chmod);
- if (!$this->_testPermissions($directory, $chmod)) {
+ if (!mkdir($directory, $chmod)) {
trigger_error(
- 'Base directory ' . $base . ' does not exist,
- please create or change using %Cache.SerializerPath',
+ 'Could not create directory ' . $directory . '',
E_USER_WARNING
);
return false;
}
+ if (!$this->_testPermissions($directory, $chmod)) {
+ return false;
+ }
} elseif (!$this->_testPermissions($directory, $chmod)) {
return false;
}
@@ -256,7 +271,7 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac
);
return false;
}
- if (function_exists('posix_getuid')) {
+ if (function_exists('posix_getuid') && $chmod !== null) {
// POSIX system, we can give more specific advice
if (fileowner($dir) === posix_getuid()) {
// we can chmod it ourselves
diff --git a/extlib/HTMLPurifier/HTMLPurifier/Encoder.php b/extlib/HTMLPurifier/HTMLPurifier/Encoder.php
index fef9b58906..b94f175423 100644
--- a/extlib/HTMLPurifier/HTMLPurifier/Encoder.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/Encoder.php
@@ -101,6 +101,14 @@ class HTMLPurifier_Encoder
* It will parse according to UTF-8 and return a valid UTF8 string, with
* non-SGML codepoints excluded.
*
+ * Specifically, it will permit:
+ * \x{9}\x{A}\x{D}\x{20}-\x{7E}\x{A0}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}
+ * Source: https://www.w3.org/TR/REC-xml/#NT-Char
+ * Arguably this function should be modernized to the HTML5 set
+ * of allowed characters:
+ * https://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
+ * which simultaneously expand and restrict the set of allowed characters.
+ *
* @param string $str The string to clean
* @param bool $force_php
* @return string
@@ -122,15 +130,12 @@ class HTMLPurifier_Encoder
* function that needs to be able to understand UTF-8 characters.
* As of right now, only smart lossless character encoding converters
* would need that, and I'm probably not going to implement them.
- * Once again, PHP 6 should solve all our problems.
*/
public static function cleanUTF8($str, $force_php = false)
{
// UTF-8 validity is checked since PHP 4.3.5
// This is an optimization: if the string is already valid UTF-8, no
// need to do PHP stuff. 99% of the time, this will be the case.
- // The regexp matches the XML char production, as well as well as excluding
- // non-SGML codepoints U+007F to U+009F
if (preg_match(
'/^[\x{9}\x{A}\x{D}\x{20}-\x{7E}\x{A0}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]*$/Du',
$str
@@ -255,6 +260,7 @@ class HTMLPurifier_Encoder
// 7F-9F is not strictly prohibited by XML,
// but it is non-SGML, and thus we don't allow it
(0xA0 <= $mUcs4 && 0xD7FF >= $mUcs4) ||
+ (0xE000 <= $mUcs4 && 0xFFFD >= $mUcs4) ||
(0x10000 <= $mUcs4 && 0x10FFFF >= $mUcs4)
)
) {
diff --git a/extlib/HTMLPurifier/HTMLPurifier/EntityParser.php b/extlib/HTMLPurifier/HTMLPurifier/EntityParser.php
index 61529dcd9d..c372b5a6a6 100644
--- a/extlib/HTMLPurifier/HTMLPurifier/EntityParser.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/EntityParser.php
@@ -16,6 +16,138 @@ class HTMLPurifier_EntityParser
*/
protected $_entity_lookup;
+ /**
+ * Callback regex string for entities in text.
+ * @type string
+ */
+ protected $_textEntitiesRegex;
+
+ /**
+ * Callback regex string for entities in attributes.
+ * @type string
+ */
+ protected $_attrEntitiesRegex;
+
+ /**
+ * Tests if the beginning of a string is a semi-optional regex
+ */
+ protected $_semiOptionalPrefixRegex;
+
+ public function __construct() {
+ // From
+ // http://stackoverflow.com/questions/15532252/why-is-reg-being-rendered-as-without-the-bounding-semicolon
+ $semi_optional = "quot|QUOT|lt|LT|gt|GT|amp|AMP|AElig|Aacute|Acirc|Agrave|Aring|Atilde|Auml|COPY|Ccedil|ETH|Eacute|Ecirc|Egrave|Euml|Iacute|Icirc|Igrave|Iuml|Ntilde|Oacute|Ocirc|Ograve|Oslash|Otilde|Ouml|REG|THORN|Uacute|Ucirc|Ugrave|Uuml|Yacute|aacute|acirc|acute|aelig|agrave|aring|atilde|auml|brvbar|ccedil|cedil|cent|copy|curren|deg|divide|eacute|ecirc|egrave|eth|euml|frac12|frac14|frac34|iacute|icirc|iexcl|igrave|iquest|iuml|laquo|macr|micro|middot|nbsp|not|ntilde|oacute|ocirc|ograve|ordf|ordm|oslash|otilde|ouml|para|plusmn|pound|raquo|reg|sect|shy|sup1|sup2|sup3|szlig|thorn|times|uacute|ucirc|ugrave|uml|uuml|yacute|yen|yuml";
+
+ // NB: three empty captures to put the fourth match in the right
+ // place
+ $this->_semiOptionalPrefixRegex = "/&()()()($semi_optional)/";
+
+ $this->_textEntitiesRegex =
+ '/&(?:'.
+ // hex
+ '[#]x([a-fA-F0-9]+);?|'.
+ // dec
+ '[#]0*(\d+);?|'.
+ // string (mandatory semicolon)
+ // NB: order matters: match semicolon preferentially
+ '([A-Za-z_:][A-Za-z0-9.\-_:]*);|'.
+ // string (optional semicolon)
+ "($semi_optional)".
+ ')/';
+
+ $this->_attrEntitiesRegex =
+ '/&(?:'.
+ // hex
+ '[#]x([a-fA-F0-9]+);?|'.
+ // dec
+ '[#]0*(\d+);?|'.
+ // string (mandatory semicolon)
+ // NB: order matters: match semicolon preferentially
+ '([A-Za-z_:][A-Za-z0-9.\-_:]*);|'.
+ // string (optional semicolon)
+ // don't match if trailing is equals or alphanumeric (URL
+ // like)
+ "($semi_optional)(?![=;A-Za-z0-9])".
+ ')/';
+
+ }
+
+ /**
+ * Substitute entities with the parsed equivalents. Use this on
+ * textual data in an HTML document (as opposed to attributes.)
+ *
+ * @param string $string String to have entities parsed.
+ * @return string Parsed string.
+ */
+ public function substituteTextEntities($string)
+ {
+ return preg_replace_callback(
+ $this->_textEntitiesRegex,
+ array($this, 'entityCallback'),
+ $string
+ );
+ }
+
+ /**
+ * Substitute entities with the parsed equivalents. Use this on
+ * attribute contents in documents.
+ *
+ * @param string $string String to have entities parsed.
+ * @return string Parsed string.
+ */
+ public function substituteAttrEntities($string)
+ {
+ return preg_replace_callback(
+ $this->_attrEntitiesRegex,
+ array($this, 'entityCallback'),
+ $string
+ );
+ }
+
+ /**
+ * Callback function for substituteNonSpecialEntities() that does the work.
+ *
+ * @param array $matches PCRE matches array, with 0 the entire match, and
+ * either index 1, 2 or 3 set with a hex value, dec value,
+ * or string (respectively).
+ * @return string Replacement string.
+ */
+
+ protected function entityCallback($matches)
+ {
+ $entity = $matches[0];
+ $hex_part = @$matches[1];
+ $dec_part = @$matches[2];
+ $named_part = empty($matches[3]) ? @$matches[4] : $matches[3];
+ if ($hex_part !== NULL && $hex_part !== "") {
+ return HTMLPurifier_Encoder::unichr(hexdec($hex_part));
+ } elseif ($dec_part !== NULL && $dec_part !== "") {
+ return HTMLPurifier_Encoder::unichr((int) $dec_part);
+ } else {
+ if (!$this->_entity_lookup) {
+ $this->_entity_lookup = HTMLPurifier_EntityLookup::instance();
+ }
+ if (isset($this->_entity_lookup->table[$named_part])) {
+ return $this->_entity_lookup->table[$named_part];
+ } else {
+ // exact match didn't match anything, so test if
+ // any of the semicolon optional match the prefix.
+ // Test that this is an EXACT match is important to
+ // prevent infinite loop
+ if (!empty($matches[3])) {
+ return preg_replace_callback(
+ $this->_semiOptionalPrefixRegex,
+ array($this, 'entityCallback'),
+ $entity
+ );
+ }
+ return $entity;
+ }
+ }
+ }
+
+ // LEGACY CODE BELOW
+
/**
* Callback regex string for parsing entities.
* @type string
@@ -144,7 +276,7 @@ class HTMLPurifier_EntityParser
$entity;
} else {
return isset($this->_special_ent2dec[$matches[3]]) ?
- $this->_special_ent2dec[$matches[3]] :
+ $this->_special_dec2str[$this->_special_ent2dec[$matches[3]]] :
$entity;
}
}
diff --git a/extlib/HTMLPurifier/HTMLPurifier/Filter/ExtractStyleBlocks.php b/extlib/HTMLPurifier/HTMLPurifier/Filter/ExtractStyleBlocks.php
index 08e62c16bf..66f70b0fc0 100644
--- a/extlib/HTMLPurifier/HTMLPurifier/Filter/ExtractStyleBlocks.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/Filter/ExtractStyleBlocks.php
@@ -95,7 +95,10 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
if ($tidy !== null) {
$this->_tidy = $tidy;
}
- $html = preg_replace_callback('##isU', array($this, 'styleCallback'), $html);
+ // NB: this must be NON-greedy because if we have
+ //
+ // we must not grab foo